rhodecode-vcsserver Commit - r1266:6139e442

merge: Resolved conflicts

andverb -

r1266:6139e442 v5.1.0 stable

parent child

vcsserver/lib/archive_cache/__init__.py

0 created 644 +79 0

			@@ -0,0 +1,79 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import logging
		20
		21	from .backends.fanout_cache import FileSystemFanoutCache
		22	from .backends.objectstore_cache import ObjectStoreCache
		23
		24	from .utils import archive_iterator # noqa
		25	from .lock import ArchiveCacheGenerationLock # noqa
		26
		27	log = logging.getLogger(__name__)
		28
		29
		30	cache_meta = None
		31
		32
		33	def includeme(config):
		34	return # vcsserver gets its config from rhodecode on a remote call
		35	# init our cache at start
		36	settings = config.get_settings()
		37	get_archival_cache_store(settings)
		38
		39
		40	def get_archival_config(config):
		41
		42	final_config = {
		43
		44	}
		45
		46	for k, v in config.items():
		47	if k.startswith('archive_cache'):
		48	final_config[k] = v
		49
		50	return final_config
		51
		52
		53	def get_archival_cache_store(config, always_init=False):
		54
		55	global cache_meta
		56	if cache_meta is not None and not always_init:
		57	return cache_meta
		58
		59	config = get_archival_config(config)
		60	backend = config['archive_cache.backend.type']
		61
		62	archive_cache_locking_url = config['archive_cache.locking.url']
		63
		64	match backend:
		65	case 'filesystem':
		66	d_cache = FileSystemFanoutCache(
		67	locking_url=archive_cache_locking_url,
		68	**config
		69	)
		70	case 'objectstore':
		71	d_cache = ObjectStoreCache(
		72	locking_url=archive_cache_locking_url,
		73	**config
		74	)
		75	case _:
		76	raise ValueError(f'archive_cache.backend.type only supports "filesystem" or "objectstore" got {backend} ')
		77
		78	cache_meta = d_cache
		79	return cache_meta

vcsserver/lib/archive_cache/backends/__init__.py

0 created 644 +17 0

			@@ -0,0 +1,17 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/

vcsserver/lib/archive_cache/backends/base.py

0 created 644 +372 0

			@@ -0,0 +1,372 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import os
		20	import functools
		21	import logging
		22	import typing
		23	import time
		24	import zlib
		25
		26	from ...ext_json import json
		27	from ..utils import StatsDB, NOT_GIVEN, ShardFileReader, EVICTION_POLICY, format_size
		28	from ..lock import GenerationLock
		29
		30	log = logging.getLogger(__name__)
		31
		32
		33	class BaseShard:
		34	storage_type: str = ''
		35	fs = None
		36
		37	@classmethod
		38	def hash(cls, key):
		39	"""Compute portable hash for `key`.
		40
		41	:param key: key to hash
		42	:return: hash value
		43
		44	"""
		45	mask = 0xFFFFFFFF
		46	return zlib.adler32(key.encode('utf-8')) & mask # noqa
		47
		48	def _write_file(self, full_path, read_iterator, mode):
		49	raise NotImplementedError
		50
		51	def _get_keyfile(self, key):
		52	raise NotImplementedError
		53
		54	def random_filename(self):
		55	raise NotImplementedError
		56
		57	def store(self, args, *kwargs):
		58	raise NotImplementedError
		59
		60	def _store(self, key, value_reader, metadata, mode):
		61	(filename, # hash-name
		62	full_path # full-path/hash-name
		63	) = self.random_filename()
		64
		65	key_file, key_file_path = self._get_keyfile(key)
		66
		67	# STORE METADATA
		68	_metadata = {
		69	"version": "v1",
		70
		71	"key_file": key_file, # this is the .key.json file storing meta
		72	"key_file_path": key_file_path, # full path to key_file
		73	"archive_key": key, # original name we stored archive under, e.g my-archive.zip
		74	"archive_filename": filename, # the actual filename we stored that file under
		75	"archive_full_path": full_path,
		76
		77	"store_time": time.time(),
		78	"access_count": 0,
		79	"access_time": 0,
		80
		81	"size": 0
		82	}
		83	if metadata:
		84	_metadata.update(metadata)
		85
		86	read_iterator = iter(functools.partial(value_reader.read, 2**22), b'')
		87	size, sha256 = self._write_file(full_path, read_iterator, mode)
		88	_metadata['size'] = size
		89	_metadata['sha256'] = sha256
		90
		91	# after archive is finished, we create a key to save the presence of the binary file
		92	with self.fs.open(key_file_path, 'wb') as f:
		93	f.write(json.dumps(_metadata))
		94
		95	return key, filename, size, _metadata
		96
		97	def fetch(self, args, *kwargs):
		98	raise NotImplementedError
		99
		100	def _fetch(self, key, retry, retry_attempts, retry_backoff,
		101	presigned_url_expires: int = 0) -> tuple[ShardFileReader, dict]:
		102	if retry is NOT_GIVEN:
		103	retry = False
		104	if retry_attempts is NOT_GIVEN:
		105	retry_attempts = 0
		106
		107	if retry and retry_attempts > 0:
		108	for attempt in range(1, retry_attempts + 1):
		109	if key in self:
		110	break
		111	# we didn't find the key, wait retry_backoff N seconds, and re-check
		112	time.sleep(retry_backoff)
		113
		114	if key not in self:
		115	log.exception(f'requested key={key} not found in {self} retry={retry}, attempts={retry_attempts}')
		116	raise KeyError(key)
		117
		118	key_file, key_file_path = self._get_keyfile(key)
		119	with self.fs.open(key_file_path, 'rb') as f:
		120	metadata = json.loads(f.read())
		121
		122	archive_path = metadata['archive_full_path']
		123	if presigned_url_expires and presigned_url_expires > 0:
		124	metadata['url'] = self.fs.url(archive_path, expires=presigned_url_expires)
		125
		126	try:
		127	return ShardFileReader(self.fs.open(archive_path, 'rb')), metadata
		128	finally:
		129	# update usage stats, count and accessed
		130	metadata["access_count"] = metadata.get("access_count", 0) + 1
		131	metadata["access_time"] = time.time()
		132	log.debug('Updated %s with access snapshot, access_count=%s access_time=%s',
		133	key_file, metadata['access_count'], metadata['access_time'])
		134	with self.fs.open(key_file_path, 'wb') as f:
		135	f.write(json.dumps(metadata))
		136
		137	def remove(self, args, *kwargs):
		138	raise NotImplementedError
		139
		140	def _remove(self, key):
		141	if key not in self:
		142	log.exception(f'requested key={key} not found in {self}')
		143	raise KeyError(key)
		144
		145	key_file, key_file_path = self._get_keyfile(key)
		146	with self.fs.open(key_file_path, 'rb') as f:
		147	metadata = json.loads(f.read())
		148
		149	archive_path = metadata['archive_full_path']
		150	self.fs.rm(archive_path)
		151	self.fs.rm(key_file_path)
		152	return 1
		153
		154	@property
		155	def storage_medium(self):
		156	return getattr(self, self.storage_type)
		157
		158	@property
		159	def key_suffix(self):
		160	return 'key.json'
		161
		162	def __contains__(self, key):
		163	"""Return `True` if `key` matching item is found in cache.
		164
		165	:param key: key matching item
		166	:return: True if key matching item
		167
		168	"""
		169	key_file, key_file_path = self._get_keyfile(key)
		170	return self.fs.exists(key_file_path)
		171
		172
		173	class BaseCache:
		174	_locking_url: str = ''
		175	_storage_path: str = ''
		176	_config: dict = {}
		177	retry = False
		178	retry_attempts: int = 0
		179	retry_backoff: int \| float = 1
		180	_shards = tuple()
		181	shard_cls = BaseShard
		182	# define the presigned url expiration, 0 == disabled
		183	presigned_url_expires: int = 0
		184
		185	def __contains__(self, key):
		186	"""Return `True` if `key` matching item is found in cache.
		187
		188	:param key: key matching item
		189	:return: True if key matching item
		190
		191	"""
		192	return self.has_key(key)
		193
		194	def __repr__(self):
		195	return f'<{self.__class__.__name__}(storage={self._storage_path})>'
		196
		197	@classmethod
		198	def gb_to_bytes(cls, gb):
		199	return gb * (1024 ** 3)
		200
		201	@property
		202	def storage_path(self):
		203	return self._storage_path
		204
		205	@classmethod
		206	def get_stats_db(cls):
		207	return StatsDB()
		208
		209	def get_conf(self, key, pop=False):
		210	if key not in self._config:
		211	raise ValueError(f"No configuration key '{key}', please make sure it exists in archive_cache config")
		212	val = self._config[key]
		213	if pop:
		214	del self._config[key]
		215	return val
		216
		217	def _get_shard(self, key) -> shard_cls:
		218	index = self._hash(key) % self._shard_count
		219	shard = self._shards[index]
		220	return shard
		221
		222	def _get_size(self, shard, archive_path):
		223	raise NotImplementedError
		224
		225	def store(self, key, value_reader, metadata=None):
		226	shard = self._get_shard(key)
		227	return shard.store(key, value_reader, metadata)
		228
		229	def fetch(self, key, retry=NOT_GIVEN, retry_attempts=NOT_GIVEN) -> tuple[typing.BinaryIO, dict]:
		230	"""
		231	Return file handle corresponding to `key` from specific shard cache.
		232	"""
		233	if retry is NOT_GIVEN:
		234	retry = self.retry
		235	if retry_attempts is NOT_GIVEN:
		236	retry_attempts = self.retry_attempts
		237	retry_backoff = self.retry_backoff
		238	presigned_url_expires = self.presigned_url_expires
		239
		240	shard = self._get_shard(key)
		241	return shard.fetch(key, retry=retry,
		242	retry_attempts=retry_attempts,
		243	retry_backoff=retry_backoff,
		244	presigned_url_expires=presigned_url_expires)
		245
		246	def remove(self, key):
		247	shard = self._get_shard(key)
		248	return shard.remove(key)
		249
		250	def has_key(self, archive_key):
		251	"""Return `True` if `key` matching item is found in cache.
		252
		253	:param archive_key: key for item, this is a unique archive name we want to store data under. e.g my-archive-svn.zip
		254	:return: True if key is found
		255
		256	"""
		257	shard = self._get_shard(archive_key)
		258	return archive_key in shard
		259
		260	def iter_keys(self):
		261	for shard in self._shards:
		262	if shard.fs.exists(shard.storage_medium):
		263	for path, _dirs, _files in shard.fs.walk(shard.storage_medium):
		264	for key_file_path in _files:
		265	if key_file_path.endswith(shard.key_suffix):
		266	yield shard, key_file_path
		267
		268	def get_lock(self, lock_key):
		269	return GenerationLock(lock_key, self._locking_url)
		270
		271	def evict(self, policy=None, size_limit=None) -> dict:
		272	"""
		273	Remove old items based on the conditions
		274
		275
		276	explanation of this algo:
		277	iterate over each shard, then for each shard iterate over the .key files
		278	read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and
		279	access data, time creation, and access counts.
		280
		281	Store that into a memory DB in order we can run different sorting strategies easily.
		282	Summing the size is a sum sql query.
		283
		284	Then we run a sorting strategy based on eviction policy.
		285	We iterate over sorted keys, and remove each checking if we hit the overall limit.
		286	"""
		287	removal_info = {
		288	"removed_items": 0,
		289	"removed_size": 0
		290	}
		291	policy = policy or self._eviction_policy
		292	size_limit = size_limit or self._cache_size_limit
		293
		294	select_policy = EVICTION_POLICY[policy]['evict']
		295
		296	log.debug('Running eviction policy \'%s\', and checking for size limit: %s',
		297	policy, format_size(size_limit))
		298
		299	if select_policy is None:
		300	return removal_info
		301
		302	db = self.get_stats_db()
		303
		304	data = []
		305	cnt = 1
		306
		307	for shard, key_file in self.iter_keys():
		308	with shard.fs.open(os.path.join(shard.storage_medium, key_file), 'rb') as f:
		309	metadata = json.loads(f.read())
		310
		311	key_file_path = os.path.join(shard.storage_medium, key_file)
		312
		313	archive_key = metadata['archive_key']
		314	archive_path = metadata['archive_full_path']
		315
		316	size = metadata.get('size')
		317	if not size:
		318	# in case we don't have size re-calc it...
		319	size = self._get_size(shard, archive_path)
		320
		321	data.append([
		322	cnt,
		323	key_file,
		324	key_file_path,
		325	archive_key,
		326	archive_path,
		327	metadata.get('store_time', 0),
		328	metadata.get('access_time', 0),
		329	metadata.get('access_count', 0),
		330	size,
		331	])
		332	cnt += 1
		333
		334	# Insert bulk data using executemany
		335	db.bulk_insert(data)
		336
		337	total_size = db.get_total_size()
		338	log.debug('Analyzed %s keys, occupying: %s, running eviction to match %s',
		339	len(data), format_size(total_size), format_size(size_limit))
		340
		341	removed_items = 0
		342	removed_size = 0
		343	for key_file, archive_key, size in db.get_sorted_keys(select_policy):
		344	# simulate removal impact BEFORE removal
		345	total_size -= size
		346
		347	if total_size <= size_limit:
		348	# we obtained what we wanted...
		349	break
		350
		351	self.remove(archive_key)
		352	removed_items += 1
		353	removed_size += size
		354	removal_info['removed_items'] = removed_items
		355	removal_info['removed_size'] = removed_size
		356	log.debug('Removed %s cache archives, and reduced size by: %s',
		357	removed_items, format_size(removed_size))
		358	return removal_info
		359
		360	def get_statistics(self):
		361	total_files = 0
		362	total_size = 0
		363	meta = {}
		364
		365	for shard, key_file in self.iter_keys():
		366	json_key = f"{shard.storage_medium}/{key_file}"
		367	with shard.fs.open(json_key, 'rb') as f:
		368	total_files += 1
		369	metadata = json.loads(f.read())
		370	total_size += metadata['size']
		371
		372	return total_files, total_size, meta

vcsserver/lib/archive_cache/backends/fanout_cache.py

0 created 644 +177 0

			@@ -0,0 +1,177 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import codecs
		20	import hashlib
		21	import logging
		22	import os
		23	import typing
		24
		25	import fsspec
		26
		27	from .base import BaseCache, BaseShard
		28	from ..utils import ShardFileReader, NOT_GIVEN
		29	from ...type_utils import str2bool
		30
		31	log = logging.getLogger(__name__)
		32
		33
		34	class FileSystemShard(BaseShard):
		35
		36	def __init__(self, index, directory, directory_folder, fs, **settings):
		37	self._index: int = index
		38	self._directory: str = directory
		39	self._directory_folder: str = directory_folder
		40	self.storage_type: str = 'directory'
		41
		42	self.fs = fs
		43
		44	@property
		45	def directory(self) -> str:
		46	"""Cache directory final path."""
		47	return os.path.join(self._directory, self._directory_folder)
		48
		49	def _get_keyfile(self, archive_key) -> tuple[str, str]:
		50	key_file: str = f'{archive_key}.{self.key_suffix}'
		51	return key_file, os.path.join(self.directory, key_file)
		52
		53	def _get_writer(self, path, mode):
		54	for count in range(1, 11):
		55	try:
		56	# Another cache may have deleted the directory before
		57	# the file could be opened.
		58	return self.fs.open(path, mode)
		59	except OSError:
		60	if count == 10:
		61	# Give up after 10 tries to open the file.
		62	raise
		63	continue
		64
		65	def _write_file(self, full_path, iterator, mode):
		66
		67	# ensure dir exists
		68	destination, _ = os.path.split(full_path)
		69	if not self.fs.exists(destination):
		70	self.fs.makedirs(destination)
		71
		72	writer = self._get_writer(full_path, mode)
		73
		74	digest = hashlib.sha256()
		75	with writer:
		76	size = 0
		77	for chunk in iterator:
		78	size += len(chunk)
		79	digest.update(chunk)
		80	writer.write(chunk)
		81	writer.flush()
		82	# Get the file descriptor
		83	fd = writer.fileno()
		84
		85	# Sync the file descriptor to disk, helps with NFS cases...
		86	os.fsync(fd)
		87	sha256 = digest.hexdigest()
		88	log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
		89	return size, sha256
		90
		91	def store(self, key, value_reader, metadata: dict \| None = None):
		92	return self._store(key, value_reader, metadata, mode='xb')
		93
		94	def fetch(self, key, retry=NOT_GIVEN,
		95	retry_attempts=NOT_GIVEN, retry_backoff=1, **kwargs) -> tuple[ShardFileReader, dict]:
		96	return self._fetch(key, retry, retry_attempts, retry_backoff)
		97
		98	def remove(self, key):
		99	return self._remove(key)
		100
		101	def random_filename(self):
		102	"""Return filename and full-path tuple for file storage.
		103
		104	Filename will be a randomly generated 28 character hexadecimal string
		105	with ".archive_cache" suffixed. Two levels of sub-directories will be used to
		106	reduce the size of directories. On older filesystems, lookups in
		107	directories with many files may be slow.
		108	"""
		109
		110	hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
		111
		112	archive_name = hex_name[4:] + '.archive_cache'
		113	filename = f"{hex_name[:2]}/{hex_name[2:4]}/{archive_name}"
		114
		115	full_path = os.path.join(self.directory, filename)
		116	return archive_name, full_path
		117
		118	def __repr__(self):
		119	return f'{self.__class__.__name__}(index={self._index}, dir={self.directory})'
		120
		121
		122	class FileSystemFanoutCache(BaseCache):
		123	shard_name: str = 'shard_{:03d}'
		124	shard_cls = FileSystemShard
		125
		126	def __init__(self, locking_url, **settings):
		127	"""
		128	Initialize file system cache instance.
		129
		130	:param str locking_url: redis url for a lock
		131	:param settings: settings dict
		132
		133	"""
		134	self._locking_url = locking_url
		135	self._config = settings
		136	cache_dir = self.get_conf('archive_cache.filesystem.store_dir')
		137	directory = str(cache_dir)
		138	directory = os.path.expanduser(directory)
		139	directory = os.path.expandvars(directory)
		140	self._directory = directory
		141	self._storage_path = directory # common path for all from BaseCache
		142
		143	self._shard_count = int(self.get_conf('archive_cache.filesystem.cache_shards', pop=True))
		144	if self._shard_count < 1:
		145	raise ValueError('cache_shards must be 1 or more')
		146
		147	self._eviction_policy = self.get_conf('archive_cache.filesystem.eviction_policy', pop=True)
		148	self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.filesystem.cache_size_gb')))
		149
		150	self.retry = str2bool(self.get_conf('archive_cache.filesystem.retry', pop=True))
		151	self.retry_attempts = int(self.get_conf('archive_cache.filesystem.retry_attempts', pop=True))
		152	self.retry_backoff = int(self.get_conf('archive_cache.filesystem.retry_backoff', pop=True))
		153
		154	log.debug('Initializing %s archival cache instance', self)
		155	fs = fsspec.filesystem('file')
		156	# check if it's ok to write, and re-create the archive cache main dir
		157	# A directory is the virtual equivalent of a physical file cabinet.
		158	# In other words, it's a container for organizing digital data.
		159	# Unlike a folder, which can only store files, a directory can store files,
		160	# subdirectories, and other directories.
		161	if not fs.exists(self._directory):
		162	fs.makedirs(self._directory, exist_ok=True)
		163
		164	self._shards = tuple(
		165	self.shard_cls(
		166	index=num,
		167	directory=directory,
		168	directory_folder=self.shard_name.format(num),
		169	fs=fs,
		170	**settings,
		171	)
		172	for num in range(self._shard_count)
		173	)
		174	self._hash = self._shards[0].hash
		175
		176	def _get_size(self, shard, archive_path):
		177	return os.stat(archive_path).st_size

vcsserver/lib/archive_cache/backends/objectstore_cache.py

0 created 644 +173 0

			@@ -0,0 +1,173 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import codecs
		20	import hashlib
		21	import logging
		22	import os
		23	import typing
		24
		25	import fsspec
		26
		27	from .base import BaseCache, BaseShard
		28	from ..utils import ShardFileReader, NOT_GIVEN
		29	from ...type_utils import str2bool
		30
		31	log = logging.getLogger(__name__)
		32
		33
		34	class S3Shard(BaseShard):
		35
		36	def __init__(self, index, bucket, bucket_folder, fs, **settings):
		37	self._index: int = index
		38	self._bucket_folder: str = bucket_folder
		39	self.storage_type: str = 'bucket'
		40	self._bucket_main: str = bucket
		41
		42	self.fs = fs
		43
		44	@property
		45	def bucket(self) -> str:
		46	"""Cache bucket final path."""
		47	return os.path.join(self._bucket_main, self._bucket_folder)
		48
		49	def _get_keyfile(self, archive_key) -> tuple[str, str]:
		50	key_file: str = f'{archive_key}-{self.key_suffix}'
		51	return key_file, os.path.join(self.bucket, key_file)
		52
		53	def _get_writer(self, path, mode):
		54	return self.fs.open(path, 'wb')
		55
		56	def _write_file(self, full_path, iterator, mode):
		57
		58	# ensure folder in bucket exists
		59	destination = self.bucket
		60	if not self.fs.exists(destination):
		61	self.fs.mkdir(destination, s3_additional_kwargs={})
		62
		63	writer = self._get_writer(full_path, mode)
		64
		65	digest = hashlib.sha256()
		66	with writer:
		67	size = 0
		68	for chunk in iterator:
		69	size += len(chunk)
		70	digest.update(chunk)
		71	writer.write(chunk)
		72
		73	sha256 = digest.hexdigest()
		74	log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
		75	return size, sha256
		76
		77	def store(self, key, value_reader, metadata: dict \| None = None):
		78	return self._store(key, value_reader, metadata, mode='wb')
		79
		80	def fetch(self, key, retry=NOT_GIVEN,
		81	retry_attempts=NOT_GIVEN, retry_backoff=1,
		82	presigned_url_expires: int = 0) -> tuple[ShardFileReader, dict]:
		83	return self._fetch(key, retry, retry_attempts, retry_backoff, presigned_url_expires=presigned_url_expires)
		84
		85	def remove(self, key):
		86	return self._remove(key)
		87
		88	def random_filename(self):
		89	"""Return filename and full-path tuple for file storage.
		90
		91	Filename will be a randomly generated 28 character hexadecimal string
		92	with ".archive_cache" suffixed. Two levels of sub-directories will be used to
		93	reduce the size of directories. On older filesystems, lookups in
		94	directories with many files may be slow.
		95	"""
		96
		97	hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
		98
		99	archive_name = hex_name[4:] + '.archive_cache'
		100	filename = f"{hex_name[:2]}-{hex_name[2:4]}-{archive_name}"
		101
		102	full_path = os.path.join(self.bucket, filename)
		103	return archive_name, full_path
		104
		105	def __repr__(self):
		106	return f'{self.__class__.__name__}(index={self._index}, bucket={self.bucket})'
		107
		108
		109	class ObjectStoreCache(BaseCache):
		110	shard_name: str = 'shard-{:03d}'
		111	shard_cls = S3Shard
		112
		113	def __init__(self, locking_url, **settings):
		114	"""
		115	Initialize objectstore cache instance.
		116
		117	:param str locking_url: redis url for a lock
		118	:param settings: settings dict
		119
		120	"""
		121	self._locking_url = locking_url
		122	self._config = settings
		123
		124	objectstore_url = self.get_conf('archive_cache.objectstore.url')
		125	self._storage_path = objectstore_url # common path for all from BaseCache
		126
		127	self._shard_count = int(self.get_conf('archive_cache.objectstore.bucket_shards', pop=True))
		128	if self._shard_count < 1:
		129	raise ValueError('cache_shards must be 1 or more')
		130
		131	self._bucket = settings.pop('archive_cache.objectstore.bucket')
		132	if not self._bucket:
		133	raise ValueError('archive_cache.objectstore.bucket needs to have a value')
		134
		135	self._eviction_policy = self.get_conf('archive_cache.objectstore.eviction_policy', pop=True)
		136	self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.objectstore.cache_size_gb')))
		137
		138	self.retry = str2bool(self.get_conf('archive_cache.objectstore.retry', pop=True))
		139	self.retry_attempts = int(self.get_conf('archive_cache.objectstore.retry_attempts', pop=True))
		140	self.retry_backoff = int(self.get_conf('archive_cache.objectstore.retry_backoff', pop=True))
		141
		142	endpoint_url = settings.pop('archive_cache.objectstore.url')
		143	key = settings.pop('archive_cache.objectstore.key')
		144	secret = settings.pop('archive_cache.objectstore.secret')
		145	region = settings.pop('archive_cache.objectstore.region')
		146
		147	log.debug('Initializing %s archival cache instance', self)
		148
		149	fs = fsspec.filesystem(
		150	's3', anon=False, endpoint_url=endpoint_url, key=key, secret=secret, client_kwargs={'region_name': region}
		151	)
		152
		153	# init main bucket
		154	if not fs.exists(self._bucket):
		155	fs.mkdir(self._bucket)
		156
		157	self._shards = tuple(
		158	self.shard_cls(
		159	index=num,
		160	bucket=self._bucket,
		161	bucket_folder=self.shard_name.format(num),
		162	fs=fs,
		163	**settings,
		164	)
		165	for num in range(self._shard_count)
		166	)
		167	self._hash = self._shards[0].hash
		168
		169	def _get_size(self, shard, archive_path):
		170	return shard.fs.info(archive_path)['size']
		171
		172	def set_presigned_url_expiry(self, val: int) -> None:
		173	self.presigned_url_expires = val

vcsserver/lib/archive_cache/lock.py

0 created 644 +62 0

			@@ -0,0 +1,62 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import redis
		20	from .._vendor import redis_lock
		21
		22
		23	class ArchiveCacheGenerationLock(Exception):
		24	pass
		25
		26
		27	class GenerationLock:
		28	"""
		29	Locking mechanism that detects if a lock is acquired
		30
		31	with GenerationLock(lock_key):
		32	compute_archive()
		33	"""
		34	lock_timeout = 7200
		35
		36	def __init__(self, lock_key, url):
		37	self.lock_key = lock_key
		38	self._create_client(url)
		39	self.lock = self.get_lock()
		40
		41	def _create_client(self, url):
		42	connection_pool = redis.ConnectionPool.from_url(url)
		43	self.writer_client = redis.StrictRedis(
		44	connection_pool=connection_pool
		45	)
		46	self.reader_client = self.writer_client
		47
		48	def get_lock(self):
		49	return redis_lock.Lock(
		50	redis_client=self.writer_client,
		51	name=self.lock_key,
		52	expire=self.lock_timeout,
		53	strict=True
		54	)
		55
		56	def __enter__(self):
		57	acquired = self.lock.acquire(blocking=False)
		58	if not acquired:
		59	raise ArchiveCacheGenerationLock('Failed to create a lock')
		60
		61	def __exit__(self, exc_type, exc_val, exc_tb):
		62	self.lock.release()

vcsserver/lib/archive_cache/utils.py

0 created 644 +134 0

			@@ -0,0 +1,134 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import sqlite3
		20	import s3fs.core
		21
		22	NOT_GIVEN = -917
		23
		24
		25	EVICTION_POLICY = {
		26	'none': {
		27	'evict': None,
		28	},
		29	'least-recently-stored': {
		30	'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time',
		31	},
		32	'least-recently-used': {
		33	'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time',
		34	},
		35	'least-frequently-used': {
		36	'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count',
		37	},
		38	}
		39
		40
		41	def archive_iterator(_reader, block_size: int = 4096 * 512):
		42	# 4096 * 64 = 64KB
		43	while 1:
		44	data = _reader.read(block_size)
		45	if not data:
		46	break
		47	yield data
		48
		49
		50	def format_size(size):
		51	# Convert size in bytes to a human-readable format (e.g., KB, MB, GB)
		52	for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
		53	if size < 1024:
		54	return f"{size:.2f} {unit}"
		55	size /= 1024
		56
		57
		58	class StatsDB:
		59
		60	def __init__(self):
		61	self.connection = sqlite3.connect(':memory:')
		62	self._init_db()
		63
		64	def _init_db(self):
		65	qry = '''
		66	CREATE TABLE IF NOT EXISTS archive_cache (
		67	rowid INTEGER PRIMARY KEY,
		68	key_file TEXT,
		69	key_file_path TEXT,
		70	archive_key TEXT,
		71	archive_path TEXT,
		72	store_time REAL,
		73	access_time REAL,
		74	access_count INTEGER DEFAULT 0,
		75	size INTEGER DEFAULT 0
		76	)
		77	'''
		78
		79	self.sql(qry)
		80	self.connection.commit()
		81
		82	@property
		83	def sql(self):
		84	return self.connection.execute
		85
		86	def bulk_insert(self, rows):
		87	qry = '''
		88	INSERT INTO archive_cache (
		89	rowid,
		90	key_file,
		91	key_file_path,
		92	archive_key,
		93	archive_path,
		94	store_time,
		95	access_time,
		96	access_count,
		97	size
		98	)
		99	VALUES (
		100	?, ?, ?, ?, ?, ?, ?, ?, ?
		101	)
		102	'''
		103	cursor = self.connection.cursor()
		104	cursor.executemany(qry, rows)
		105	self.connection.commit()
		106
		107	def get_total_size(self):
		108	qry = 'SELECT COALESCE(SUM(size), 0) FROM archive_cache'
		109	((total_size,),) = self.sql(qry).fetchall()
		110	return total_size
		111
		112	def get_sorted_keys(self, select_policy):
		113	select_policy_qry = select_policy.format(fields='key_file, archive_key, size')
		114	return self.sql(select_policy_qry).fetchall()
		115
		116
		117	class ShardFileReader:
		118
		119	def __init__(self, file_like_reader):
		120	self._file_like_reader = file_like_reader
		121
		122	def __getattr__(self, item):
		123	if isinstance(self._file_like_reader, s3fs.core.S3File):
		124	match item:
		125	case 'name':
		126	# S3 FileWrapper doesn't support name attribute, and we use it
		127	return self._file_like_reader.full_name
		128	case _:
		129	return getattr(self._file_like_reader, item)
		130	else:
		131	return getattr(self._file_like_reader, item)
		132
		133	def __repr__(self):
		134	return f'<{self.__class__.__name__}={self._file_like_reader}>'

vcsserver/lib/svn_txn_utils.py

0 created 644 +111 0

			@@ -0,0 +1,111 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17
		18	import logging
		19	import redis
		20
		21	from ..lib import rc_cache
		22	from ..lib.ext_json import json
		23
		24
		25	log = logging.getLogger(__name__)
		26
		27	redis_client = None
		28
		29
		30	class RedisTxnClient:
		31
		32	def __init__(self, url):
		33	self.url = url
		34	self._create_client(url)
		35
		36	def _create_client(self, url):
		37	connection_pool = redis.ConnectionPool.from_url(url)
		38	self.writer_client = redis.StrictRedis(
		39	connection_pool=connection_pool
		40	)
		41	self.reader_client = self.writer_client
		42
		43	def set(self, key, value):
		44	self.writer_client.set(key, value)
		45
		46	def get(self, key):
		47	return self.reader_client.get(key)
		48
		49	def delete(self, key):
		50	self.writer_client.delete(key)
		51
		52
		53	def get_redis_client(url=''):
		54
		55	global redis_client
		56	if redis_client is not None:
		57	return redis_client
		58	if not url:
		59	from vcsserver import CONFIG
		60	url = CONFIG['vcs.svn.redis_conn']
		61	redis_client = RedisTxnClient(url)
		62	return redis_client
		63
		64
		65	def get_txn_id_data_key(repo_path, svn_txn_id):
		66	log.debug('svn-txn-id: %s, obtaining data path', svn_txn_id)
		67	repo_key = rc_cache.utils.compute_key_from_params(repo_path)
		68	final_key = f'{repo_key}.{svn_txn_id}.svn_txn_id'
		69	log.debug('computed final key: %s', final_key)
		70
		71	return final_key
		72
		73
		74	def store_txn_id_data(repo_path, svn_txn_id, data_dict):
		75	log.debug('svn-txn-id: %s, storing data', svn_txn_id)
		76
		77	if not svn_txn_id:
		78	log.warning('Cannot store txn_id because it is empty')
		79	return
		80
		81	redis_conn = get_redis_client()
		82
		83	store_key = get_txn_id_data_key(repo_path, svn_txn_id)
		84	store_data = json.dumps(data_dict)
		85	redis_conn.set(store_key, store_data)
		86
		87
		88	def get_txn_id_from_store(repo_path, svn_txn_id, rm_on_read=False):
		89	"""
		90	Reads txn_id from store and if present returns the data for callback manager
		91	"""
		92	log.debug('svn-txn-id: %s, retrieving data', svn_txn_id)
		93	redis_conn = get_redis_client()
		94
		95	store_key = get_txn_id_data_key(repo_path, svn_txn_id)
		96	data = {}
		97	redis_conn.get(store_key)
		98	raw_data = 'not-set'
		99	try:
		100	raw_data = redis_conn.get(store_key)
		101	if not raw_data:
		102	raise ValueError(f'Failed to get txn_id metadata, from store: {store_key}')
		103	data = json.loads(raw_data)
		104	except Exception:
		105	log.exception('Failed to get txn_id metadata: %s', raw_data)
		106
		107	if rm_on_read:
		108	log.debug('Cleaning up txn_id at %s', store_key)
		109	redis_conn.delete(store_key)
		110
		111	return data

.bumpversion.cfg

0 +1 -1

              [bumpversion]
-             current_version = 5.0.3
+             current_version = 5.1.0
              message = release: Bump version {current_version} to {new_version}
              [bumpversion:file:vcsserver/VERSION]

Makefile

0 +17 -12

              # required for pushd to work..
              SHELL = /bin/bash
              # set by: PATH_TO_OUTDATED_PACKAGES=/some/path/outdated_packages.py
              OUTDATED_PACKAGES = ${PATH_TO_OUTDATED_PACKAGES}
              .PHONY: clean
              ## Cleanup compiled and cache py files
              clean:
              	make test-clean
              	find . -type f \( -iname '*.c' -o -iname '*.pyc' -o -iname '*.so' -o -iname '*.orig' \) -exec rm '{}' ';'
              	find . -type d -name "build" -prune -exec rm -rf '{}' ';'
              .PHONY: test
              ## run test-clean and tests
              test:
              	make test-clean
              	make test-only
              .PHONY: test-clean
              ## run test-clean and tests
              test-clean:
              	rm -rf coverage.xml htmlcov junit.xml pylint.log result
              	find . -type d -name "__pycache__" -prune -exec rm -rf '{}' ';'
              	find . -type f \( -iname '.coverage.*' \) -exec rm '{}' ';'
              .PHONY: test-only
              ## Run tests only without cleanup
              test-only:
              	PYTHONHASHSEED=random \
              	py.test -x -vv -r xw -p no:sugar \
              	--cov-report=term-missing --cov-report=html \
              	--cov=vcsserver vcsserver
              .PHONY: ruff-check
              ## run a ruff analysis
              ruff-check:
              	ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs  --exclude .dev .
              .PHONY: pip-packages
              ## Show outdated packages
              pip-packages:
              	python ${OUTDATED_PACKAGES}
              .PHONY: build
              ## Build sdist/egg
              build:
              	python -m build
              .PHONY: dev-sh
              ## make dev-sh
              dev-sh:
              	sudo echo "deb [trusted=yes] https://apt.fury.io/rsteube/ /" | sudo tee -a "/etc/apt/sources.list.d/fury.list"
              	sudo apt-get update
              	sudo apt-get install -y zsh carapace-bin
              	rm -rf /home/rhodecode/.oh-my-zsh
              	curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh | sh
-             	echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
-             	PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
+             	@echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
+             	@echo "${RC_DEV_CMD_HELP}"
+             	@PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
+             .PHONY: dev-cleanup
+             ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
+             dev-cleanup:
+             	pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
+             	rm -rf /tmp/*
              .PHONY: dev-env
              ## make dev-env based on the requirements files and install develop of packages
              ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
              dev-env:
+             	sudo -u root chown rhodecode:rhodecode /home/rhodecode/.cache/pip/
-             	pip install build virtualenv
              	pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
              	pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
              	pip install -e .
              .PHONY: sh
              ## shortcut for make dev-sh dev-env
              sh:
              	make dev-env
              	make dev-sh
-             .PHONY: dev-srv
-             ## run develop server instance, docker exec -it $(docker ps -q --filter 'name=dev-enterprise-ce') /bin/bash
-             dev-srv:
-             	pserve --reload .dev/dev.ini
+             ## Allows changes of workers e.g make dev-srv-g workers=2
+             workers?=1
-             .PHONY: dev-srv-g
-             ## run gunicorn multi process workers
-             dev-srv-g:
-             	gunicorn --workers=4 --paste .dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py
+             .PHONY: dev-srv
+             ## run gunicorn web server with reloader, use workers=N to set multiworker mode
+             dev-srv:
+             	gunicorn --paste=.dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py --reload --workers=$(workers)
              # Default command on calling make
              .DEFAULT_GOAL := show-help
              .PHONY: show-help
              show-help:
              	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
              	@echo
              	@sed -n -e "/^## / { \
              		h; \
              		s/.*//; \
              		:doc" \
              		-e "H; \
              		n; \
              		s/^## //; \
              		t doc" \
              		-e "s/:.*//; \
              		G; \
              		s/\\n## /---/; \
              		s/\\n/ /g; \
              		p; \
              	}" ${MAKEFILE_LIST} \
              	| LC_ALL='C' sort --ignore-case \
              	| awk -F '---' \
              		-v ncol=$$(tput cols) \
              		-v indent=19 \
              		-v col_on="$$(tput setaf 6)" \
              		-v col_off="$$(tput sgr0)" \
              	'{ \
              		printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
              		n = split($$2, words, " "); \
              		line_length = ncol - indent; \
              		for (i = 1; i <= n; i++) { \
              			line_length -= length(words[i]) + 1; \
              			if (line_length <= 0) { \
              				line_length = ncol - indent - length(words[i]) - 1; \
              				printf "\n%*s ", -indent, " "; \
              			} \
              			printf "%s ", words[i]; \
              		} \
              		printf "\n"; \
              	}'

configs/development.ini

0 +11 -24

+             #
              ; #################################
              ; RHODECODE VCSSERVER CONFIGURATION
              ; #################################
              [server:main]
-             ; COMMON HOST/IP CONFIG
+             ; COMMON HOST/IP CONFIG, This applies mostly to develop setup,
+             ; Host port for gunicorn are controlled by gunicorn_conf.py
              host = 0.0.0.0
              port = 10010
-             ; ##################################################
-             ; WAITRESS WSGI SERVER - Recommended for Development
-             ; ##################################################
-             ; use server type
-             use = egg:waitress#main
-             ; number of worker threads
-             threads = 5
-             ; MAX BODY SIZE 100GB
-             max_request_body_size = 107374182400
-             ; Use poll instead of select, fixes file descriptors limits problems.
-             ; May not work on old windows systems.
-             asyncore_use_poll = true
              ; ###########################
              ; GUNICORN APPLICATION SERVER
              ; ###########################
-             ; run with gunicorn --paste rhodecode.ini
+             ; run with gunicorn  --config gunicorn_conf.py --paste vcsserver.ini
              ; Module to use, this setting shouldn't be changed
-             #use = egg:gunicorn#main
+             use = egg:gunicorn#main
              [app:main]
              ; The %(here)s variable will be replaced with the absolute path of parent directory
              ; of this file
              ; Each option in the app:main can be override by an environmental variable
              ;
              ;To override an option:
              ;
              ;RC_<KeyName>
              ;Everything should be uppercase, . and - should be replaced by _.
              ;For example, if you have these configuration settings:
              ;rc_cache.repo_object.backend = foo
              ;can be overridden by
              ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
              use = egg:rhodecode-vcsserver
              ; #############
              ; DEBUG OPTIONS
              ; #############
              # During development the we want to have the debug toolbar enabled
              pyramid.includes =
                  pyramid_debugtoolbar
              debugtoolbar.hosts = 0.0.0.0/0
              debugtoolbar.exclude_prefixes =
                  /css
                  /fonts
                  /images
                  /js
              ; #################
              ; END DEBUG OPTIONS
              ; #################
              ; Pyramid default locales, we need this to be set
              #pyramid.default_locale_name = en
              ; default locale used by VCS systems
              #locale = en_US.UTF-8
-             ; path to binaries for vcsserver, it should be set by the installer
+             ; path to binaries (hg,git,svn) for vcsserver, it should be set by the installer
              ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
-             ; it can also be a path to nix-build output in case of development
-             core.binary_dir = ""
+             ; or /usr/local/bin/rhodecode_bin/vcs_bin
+             core.binary_dir =
+             ; Redis connection settings for svn integrations logic
+             ; This connection string needs to be the same on ce and vcsserver
+             vcs.svn.redis_conn = redis://redis:6379/0
              ; Custom exception store path, defaults to TMPDIR
              ; This is used to store exception from RhodeCode in shared directory
              #exception_tracker.store_path =
              ; #############
              ; DOGPILE CACHE
              ; #############
              ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
              ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
              #cache_dir = %(here)s/data
              ; ***************************************
              ; `repo_object` cache, default file based
              ; ***************************************
              ; `repo_object` cache settings for vcs methods for repositories
              #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
              #rc_cache.repo_object.expiration_time = 2592000
              ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
              #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
              ; ***********************************************************
              ; `repo_object` cache with redis backend
              ; recommended for larger instance, and for better performance
              ; ***********************************************************
              ; `repo_object` cache settings for vcs methods for repositories
              #rc_cache.repo_object.backend = dogpile.cache.rc.redis_msgpack
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
              #rc_cache.repo_object.expiration_time = 2592000
              ; redis_expiration_time needs to be greater then expiration_time
              #rc_cache.repo_object.arguments.redis_expiration_time = 3592000
              #rc_cache.repo_object.arguments.host = localhost
              #rc_cache.repo_object.arguments.port = 6379
              #rc_cache.repo_object.arguments.db = 5
              #rc_cache.repo_object.arguments.socket_timeout = 30
              ; more Redis options: https://dogpilecache.sqlalchemy.org/en/latest/api.html#redis-backends
              #rc_cache.repo_object.arguments.distributed_lock = true
              ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
              #rc_cache.repo_object.arguments.lock_auto_renewal = true
              ; Statsd client config, this is used to send metrics to statsd
              ; We recommend setting statsd_exported and scrape them using Promethues
              #statsd.enabled = false
              #statsd.statsd_host = 0.0.0.0
              #statsd.statsd_port = 8125
              #statsd.statsd_prefix =
              #statsd.statsd_ipv6 = false
              ; configure logging automatically at server startup set to false
              ; to use the below custom logging config.
              ; RC_LOGGING_FORMATTER
              ; RC_LOGGING_LEVEL
              ; env variables can control the settings for logging in case of autoconfigure
              #logging.autoconfigure = true
              ; specify your own custom logging config file to configure logging
              #logging.logging_conf_file = /path/to/custom_logging.ini
              ; #####################
              ; LOGGING CONFIGURATION
              ; #####################
              [loggers]
              keys = root, vcsserver
              [handlers]
              keys = console
              [formatters]
              keys = generic, json
              ; #######
              ; LOGGERS
              ; #######
              [logger_root]
              level = NOTSET
              handlers = console
              [logger_vcsserver]
              level = DEBUG
              handlers =
              qualname = vcsserver
              propagate = 1
              ; ########
              ; HANDLERS
              ; ########
              [handler_console]
              class = StreamHandler
              args = (sys.stderr, )
              level = DEBUG
              ; To enable JSON formatted logs replace 'generic' with 'json'
              ; This allows sending properly formatted logs to grafana loki or elasticsearch
              formatter = generic
              ; ##########
              ; FORMATTERS
              ; ##########
              [formatter_generic]
              format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
              datefmt = %Y-%m-%d %H:%M:%S
              [formatter_json]
              format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
              class = vcsserver.lib._vendor.jsonlogger.JsonFormatter

configs/production.ini

0 +11 -7

+             #
              ; #################################
              ; RHODECODE VCSSERVER CONFIGURATION
              ; #################################
              [server:main]
-             ; COMMON HOST/IP CONFIG
-             host = 127.0.0.1
+             ; COMMON HOST/IP CONFIG, This applies mostly to develop setup,
+             ; Host port for gunicorn are controlled by gunicorn_conf.py
+             host = 0.0.0.0
              port = 10010
              ; ###########################
              ; GUNICORN APPLICATION SERVER
              ; ###########################
-             ; run with gunicorn --paste rhodecode.ini
+             ; run with gunicorn  --config gunicorn_conf.py --paste vcsserver.ini
              ; Module to use, this setting shouldn't be changed
              use = egg:gunicorn#main
              [app:main]
              ; The %(here)s variable will be replaced with the absolute path of parent directory
              ; of this file
              ; Each option in the app:main can be override by an environmental variable
              ;
              ;To override an option:
              ;
              ;RC_<KeyName>
              ;Everything should be uppercase, . and - should be replaced by _.
              ;For example, if you have these configuration settings:
              ;rc_cache.repo_object.backend = foo
              ;can be overridden by
              ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
              use = egg:rhodecode-vcsserver
              ; Pyramid default locales, we need this to be set
              #pyramid.default_locale_name = en
              ; default locale used by VCS systems
              #locale = en_US.UTF-8
-             ; path to binaries for vcsserver, it should be set by the installer
+             ; path to binaries (hg,git,svn) for vcsserver, it should be set by the installer
              ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
-             ; it can also be a path to nix-build output in case of development
-             core.binary_dir = ""
+             ; or /usr/local/bin/rhodecode_bin/vcs_bin
+             core.binary_dir =
+             ; Redis connection settings for svn integrations logic
+             ; This connection string needs to be the same on ce and vcsserver
+             vcs.svn.redis_conn = redis://redis:6379/0
              ; Custom exception store path, defaults to TMPDIR
              ; This is used to store exception from RhodeCode in shared directory
              #exception_tracker.store_path =
              ; #############
              ; DOGPILE CACHE
              ; #############
              ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
              ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
              #cache_dir = %(here)s/data
              ; ***************************************
              ; `repo_object` cache, default file based
              ; ***************************************
              ; `repo_object` cache settings for vcs methods for repositories
              #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
              #rc_cache.repo_object.expiration_time = 2592000
              ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
              #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
              ; ***********************************************************
              ; `repo_object` cache with redis backend
              ; recommended for larger instance, and for better performance
              ; ***********************************************************
              ; `repo_object` cache settings for vcs methods for repositories
              #rc_cache.repo_object.backend = dogpile.cache.rc.redis_msgpack
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
              #rc_cache.repo_object.expiration_time = 2592000
              ; redis_expiration_time needs to be greater then expiration_time
              #rc_cache.repo_object.arguments.redis_expiration_time = 3592000
              #rc_cache.repo_object.arguments.host = localhost
              #rc_cache.repo_object.arguments.port = 6379
              #rc_cache.repo_object.arguments.db = 5
              #rc_cache.repo_object.arguments.socket_timeout = 30
              ; more Redis options: https://dogpilecache.sqlalchemy.org/en/latest/api.html#redis-backends
              #rc_cache.repo_object.arguments.distributed_lock = true
              ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
              #rc_cache.repo_object.arguments.lock_auto_renewal = true
              ; Statsd client config, this is used to send metrics to statsd
              ; We recommend setting statsd_exported and scrape them using Promethues
              #statsd.enabled = false
              #statsd.statsd_host = 0.0.0.0
              #statsd.statsd_port = 8125
              #statsd.statsd_prefix =
              #statsd.statsd_ipv6 = false
              ; configure logging automatically at server startup set to false
              ; to use the below custom logging config.
              ; RC_LOGGING_FORMATTER
              ; RC_LOGGING_LEVEL
              ; env variables can control the settings for logging in case of autoconfigure
              #logging.autoconfigure = true
              ; specify your own custom logging config file to configure logging
              #logging.logging_conf_file = /path/to/custom_logging.ini
              ; #####################
              ; LOGGING CONFIGURATION
              ; #####################
              [loggers]
              keys = root, vcsserver
              [handlers]
              keys = console
              [formatters]
              keys = generic, json
              ; #######
              ; LOGGERS
              ; #######
              [logger_root]
              level = NOTSET
              handlers = console
              [logger_vcsserver]
              level = INFO
              handlers =
              qualname = vcsserver
              propagate = 1
              ; ########
              ; HANDLERS
              ; ########
              [handler_console]
              class = StreamHandler
              args = (sys.stderr, )
              level = INFO
              ; To enable JSON formatted logs replace 'generic' with 'json'
              ; This allows sending properly formatted logs to grafana loki or elasticsearch
              formatter = generic
              ; ##########
              ; FORMATTERS
              ; ##########
              [formatter_generic]
              format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
              datefmt = %Y-%m-%d %H:%M:%S
              [formatter_json]
              format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
              class = vcsserver.lib._vendor.jsonlogger.JsonFormatter

requirements.txt

0 +38 -13

              # deps, generated via pipdeptree --exclude setuptools,wheel,pipdeptree,pip -f | tr '[:upper:]' '[:lower:]'
              async-timeout==4.0.3
              atomicwrites==1.4.1
              celery==5.3.6
                billiard==4.2.0
                click==8.1.3
                click-didyoumean==0.3.0
                  click==8.1.3
                click-plugins==1.1.1
                  click==8.1.3
                click-repl==0.2.0
                  click==8.1.3
                  prompt-toolkit==3.0.38
                    wcwidth==0.2.6
                  six==1.16.0
                kombu==5.3.5
                  amqp==5.2.0
                    vine==5.1.0
                  vine==5.1.0
                python-dateutil==2.8.2
                  six==1.16.0
-               tzdata==2023.4
+               tzdata==2024.1
                vine==5.1.0
              contextlib2==21.6.0
-             cov-core==1.15.0
-               coverage==7.2.3
-             diskcache==5.6.3
-             dogpile.cache==1.3.0
+             dogpile.cache==1.3.3
                decorator==5.1.1
                stevedore==5.1.0
                  pbr==5.11.1
              dulwich==0.21.6
                urllib3==1.26.14
+             fsspec==2024.6.0
              gunicorn==21.2.0
-               packaging==23.1
-             hg-evolve==11.0.2
+               packaging==24.0
+             hg-evolve==11.1.3
              importlib-metadata==6.0.0
                zipp==3.15.0
-             mercurial==6.3.3
-             mock==5.0.2
+             mercurial==6.7.4
              more-itertools==9.1.0
-             msgpack==1.0.7
-             orjson==3.9.13
+             msgpack==1.0.8
+             orjson==3.10.3
              psutil==5.9.8
              py==1.11.0
              pygit2==1.13.3
                cffi==1.16.0
                  pycparser==2.21
              pygments==2.15.1
              pyparsing==3.1.1
              pyramid==2.0.2
                hupper==1.12
                plaster==1.1.2
                plaster-pastedeploy==1.0.1
                  pastedeploy==3.1.0
                  plaster==1.1.2
                translationstring==1.4
                venusian==3.0.0
                webob==1.8.7
                zope.deprecation==5.0.0
-               zope.interface==6.1.0
-             redis==5.0.1
+               zope.interface==6.3.0
+             redis==5.0.4
                async-timeout==4.0.3
              repoze.lru==0.7
+             s3fs==2024.6.0
+               aiobotocore==2.13.0
+                 aiohttp==3.9.5
+                   aiosignal==1.3.1
+                     frozenlist==1.4.1
+                   attrs==22.2.0
+                   frozenlist==1.4.1
+                   multidict==6.0.5
+                   yarl==1.9.4
+                     idna==3.4
+                     multidict==6.0.5
+                 aioitertools==0.11.0
+                 botocore==1.34.106
+                   jmespath==1.0.1
+                   python-dateutil==2.8.2
+                     six==1.16.0
+                   urllib3==1.26.14
+                 wrapt==1.16.0
+               aiohttp==3.9.5
+                 aiosignal==1.3.1
+                   frozenlist==1.4.1
+                 attrs==22.2.0
+                 frozenlist==1.4.1
+                 multidict==6.0.5
+                 yarl==1.9.4
+                   idna==3.4
+                   multidict==6.0.5
+               fsspec==2024.6.0
              scandir==1.10.0
              setproctitle==1.3.3
              subvertpy==0.11.0
              waitress==3.0.0
              wcwidth==0.2.6
              ## test related requirements
              #-r requirements_test.txt
              ## uncomment to add the debug libraries
              #-r requirements_debug.txt

requirements_test.txt

0 +33 -30

		@@ -1,45 +1,48 b''
1	1	# test related requirements
2
3		cov-core==1.15.0
4		coverage==7.2.3
5		mock==5.0.2
6		py==1.11.0
7		pytest-cov==4.0.0
8		coverage==7.2.3
9		pytest==7.3.1
10		attrs==22.2.0
	2	mock==5.1.0
	3	pytest-cov==4.1.0
	4	coverage==7.4.3
	5	pytest==8.1.1
11	6	iniconfig==2.0.0
12		packaging==2~~3.1~~
13		pluggy==1.0.0
	7	packaging==24.0
	8	pluggy==1.4.0
	9	pytest-env==1.1.3
	10	pytest==8.1.1
	11	iniconfig==2.0.0
	12	packaging==24.0
	13	pluggy==1.4.0
14	14	pytest-profiling==1.7.0
15	15	gprof2dot==2022.7.29
16		pytest==~~7.3~~.1
17		attrs==22.2.0
	16	pytest==8.1.1
18	17	iniconfig==2.0.0
19		packaging==2~~3.1~~
20		pluggy==1.0.0
	18	packaging==24.0
	19	pluggy==1.4.0
21	20	six==1.16.0
22		pytest-r~~unner==6.0~~.0
23		pytest-sugar==0.9.7
24		packaging==23.1
25		pytest==7.3.1
26		attrs==22.2.0
	21	pytest-rerunfailures==13.0
	22	packaging==24.0
	23	pytest==8.1.1
27	24	iniconfig==2.0.0
28		packaging==2~~3.1~~
29		pluggy==1.0.0
30		termcolor==2.3.0
31		pytest-~~timeout==2.1~~.0
32		pytest==7.3.1
33		attrs==22.2.0
	25	packaging==24.0
	26	pluggy==1.4.0
	27	pytest-runner==6.0.1
	28	pytest-sugar==1.0.0
	29	packaging==24.0
	30	pytest==8.1.1
34	31	iniconfig==2.0.0
35		packaging==2~~3.1~~
36		pluggy==1.0.0
	32	packaging==24.0
	33	pluggy==1.4.0
	34	termcolor==2.4.0
	35	pytest-timeout==2.3.1
	36	pytest==8.1.1
	37	iniconfig==2.0.0
	38	packaging==24.0
	39	pluggy==1.4.0
37	40	webtest==3.0.0
38		beautifulsoup4==4.11.2
39		soupsieve==2.4
	41	beautifulsoup4==4.12.3
	42	soupsieve==2.5
40	43	waitress==3.0.0
41	44	webob==1.8.7
42	45
43	46	# RhodeCode test-data
44	47	rc_testdata @ https://code.rhodecode.com/upstream/rc-testdata-dist/raw/77378e9097f700b4c1b9391b56199fe63566b5c9/rc_testdata-0.11.0.tar.gz#egg=rc_testdata
45	48	rc_testdata==0.11.0

vcsserver/VERSION

0 +1 -1

		@@ -1,1 +1,1 b''
1		5.~~0.3~~ No newline at end of file
	1	5.1.0 No newline at end of file

vcsserver/base.py

0 +17 -23

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import sys
              import tempfile
              import logging
              import urllib.parse
-             from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store
+             from vcsserver.lib.archive_cache import get_archival_cache_store
              from vcsserver import exceptions
              from vcsserver.exceptions import NoContentException
              from vcsserver.hgcompat import archival
-             from vcsserver.str_utils import safe_bytes
+             from vcsserver.lib.str_utils import safe_bytes
              from vcsserver.lib.exc_tracking import format_exc
              log = logging.getLogger(__name__)
              class RepoFactory:
                  """
                  Utility to create instances of repository
                  It provides internal caching of the `repo` object based on
                  the :term:`call context`.
                  """
                  repo_type = None
                  def __init__(self):
                      pass
                  def _create_config(self, path, config):
                      config = {}
                      return config
                  def _create_repo(self, wire, create):
                      raise NotImplementedError()
                  def repo(self, wire, create=False):
                      raise NotImplementedError()
              def obfuscate_qs(query_string):
                  if query_string is None:
                      return None
                  parsed = []
                  for k, v in urllib.parse.parse_qsl(query_string, keep_blank_values=True):
                      if k in ['auth_token', 'api_key']:
                          v = "*****"
                      parsed.append((k, v))
                  return '&'.join('{}{}'.format(
                      k, f'={v}' if v else '') for k, v in parsed)
              def raise_from_original(new_type, org_exc: Exception):
                  """
                  Raise a new exception type with original args and traceback.
                  """
                  exc_info = sys.exc_info()
                  exc_type, exc_value, exc_traceback = exc_info
                  new_exc = new_type(*exc_value.args)
                  # store the original traceback into the new exc
                  new_exc._org_exc_tb = format_exc(exc_info)
                  try:
                      raise new_exc.with_traceback(exc_traceback)
                  finally:
                      del exc_traceback
              class ArchiveNode:
                  def __init__(self, path, mode, is_link, raw_bytes):
                      self.path = path
                      self.mode = mode
                      self.is_link = is_link
                      self.raw_bytes = raw_bytes
              def store_archive_in_cache(node_walker, archive_key, kind, mtime, archive_at_path, archive_dir_name,
                                         commit_id, write_metadata=True, extra_metadata=None, cache_config=None):
                  """
                  Function that would store generate archive and send it to a dedicated backend store
                  In here we use diskcache
                  :param node_walker: a generator returning nodes to add to archive
                  :param archive_key: key used to store the path
                  :param kind: archive kind
                  :param mtime: time of creation
                  :param archive_at_path: default '/' the path at archive was started.
                      If this is not '/' it means it's a partial archive
                  :param archive_dir_name: inside dir name when creating an archive
                  :param commit_id: commit sha of revision archive was created at
                  :param write_metadata:
                  :param extra_metadata:
                  :param cache_config:
                  walker should be a file walker, for example,
                      def node_walker():
                          for file_info in files:
                              yield ArchiveNode(fn, mode, is_link, ctx[fn].data)
                  """
                  extra_metadata = extra_metadata or {}
                  d_cache = get_archival_cache_store(config=cache_config)
                  if archive_key in d_cache:
-                     with d_cache as d_cache_reader:
-                         reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True)
-                         return reader.name
+                     reader, metadata = d_cache.fetch(archive_key)
+                     return reader.name
                  archive_tmp_path = safe_bytes(tempfile.mkstemp()[1])
                  log.debug('Creating new temp archive in %s', archive_tmp_path)
                  if kind == "tgz":
                      archiver = archival.tarit(archive_tmp_path, mtime, b"gz")
                  elif kind == "tbz2":
                      archiver = archival.tarit(archive_tmp_path, mtime, b"bz2")
                  elif kind == 'zip':
                      archiver = archival.zipit(archive_tmp_path, mtime)
                  else:
                      raise exceptions.ArchiveException()(
                          f'Remote does not support: "{kind}" archive type.')
                  for f in node_walker(commit_id, archive_at_path):
                      f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/'))
                      try:
                          archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
                      except NoContentException:
                          # NOTE(marcink): this is a special case for SVN so we can create "empty"
                          # directories which are not supported by archiver
                          archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'')
+                 metadata = dict([
+                     ('commit_id', commit_id),
+                     ('mtime', mtime),
+                 ])
+                 metadata.update(extra_metadata)
                  if write_metadata:
-                     metadata = dict([
-                         ('commit_id', commit_id),
-                         ('mtime', mtime),
-                     ])
-                     metadata.update(extra_metadata)
                      meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()]
                      f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt')
                      archiver.addfile(f_path, 0o644, False, b'\n'.join(meta))
                  archiver.done()
-                 # ensure set & get are atomic
-                 with d_cache.transact():
-                     with open(archive_tmp_path, 'rb') as archive_file:
-                         add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True)
-                         if not add_result:
-                             log.error('Failed to store cache for key=%s', archive_key)
+                 with open(archive_tmp_path, 'rb') as archive_file:
+                     add_result = d_cache.store(archive_key, archive_file, metadata=metadata)
+                     if not add_result:
+                         log.error('Failed to store cache for key=%s', archive_key)
-                     os.remove(archive_tmp_path)
+                 os.remove(archive_tmp_path)
-                     reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True)
-                     if not reader:
-                         raise AssertionError(f'empty reader on key={archive_key} added={add_result}')
+                 reader, metadata = d_cache.fetch(archive_key)
-                     return reader.name
+                 return reader.name
              class BinaryEnvelope:
                  def __init__(self, val):
                      self.val = val
              class BytesEnvelope(bytes):
                  def __new__(cls, content):
                      if isinstance(content, bytes):
                          return super().__new__(cls, content)
                      else:
                          raise TypeError('BytesEnvelope content= param must be bytes. Use BinaryEnvelope to wrap other types')
              class BinaryBytesEnvelope(BytesEnvelope):
                  pass

vcsserver/config/settings_maker.py

0 +1 -1

              # Copyright (C) 2010-2023 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              import os
              import textwrap
              import string
              import functools
              import logging
              import tempfile
              import logging.config
-             from vcsserver.type_utils import str2bool, aslist
+             from vcsserver.lib.type_utils import str2bool, aslist
              log = logging.getLogger(__name__)
              # skip keys, that are set here, so we don't double process those
              set_keys = {
                  '__file__': ''
              }
              class SettingsMaker:
                  def __init__(self, app_settings):
                      self.settings = app_settings
                  @classmethod
                  def _bool_func(cls, input_val):
                      if isinstance(input_val, bytes):
                          # decode to str
                          input_val = input_val.decode('utf8')
                      return str2bool(input_val)
                  @classmethod
                  def _int_func(cls, input_val):
                      return int(input_val)
                  @classmethod
                  def _float_func(cls, input_val):
                      return float(input_val)
                  @classmethod
                  def _list_func(cls, input_val, sep=','):
                      return aslist(input_val, sep=sep)
                  @classmethod
                  def _string_func(cls, input_val, lower=True):
                      if lower:
                          input_val = input_val.lower()
                      return input_val
                  @classmethod
                  def _string_no_quote_func(cls, input_val, lower=True):
                      """
                      Special case string function that detects if value is set to empty quote string
                      e.g.
                      core.binary_dir = ""
                      """
                      input_val = cls._string_func(input_val, lower=lower)
                      if input_val in ['""', "''"]:
                          return ''
                      return input_val
                  @classmethod
                  def _dir_func(cls, input_val, ensure_dir=False, mode=0o755):
                      # ensure we have our dir created
                      if not os.path.isdir(input_val) and ensure_dir:
                          os.makedirs(input_val, mode=mode, exist_ok=True)
                      if not os.path.isdir(input_val):
                          raise Exception(f'Dir at {input_val} does not exist')
                      return input_val
                  @classmethod
                  def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755):
                      dirname = os.path.dirname(input_val)
                      cls._dir_func(dirname, ensure_dir=ensure_dir)
                      return input_val
                  @classmethod
                  def _key_transformator(cls, key):
                      return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_'))
                  def maybe_env_key(self, key):
                      # now maybe we have this KEY in env, search and use the value with higher priority.
                      transformed_key = self._key_transformator(key)
                      envvar_value = os.environ.get(transformed_key)
                      if envvar_value:
                          log.debug('using `%s` key instead of `%s` key for config', transformed_key, key)
                      return envvar_value
                  def env_expand(self):
                      replaced = {}
                      for k, v in self.settings.items():
                          if k not in set_keys:
                              envvar_value = self.maybe_env_key(k)
                              if envvar_value:
                                  replaced[k] = envvar_value
                                  set_keys[k] = envvar_value
                      # replace ALL keys updated
                      self.settings.update(replaced)
                  def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'):
                      """
                      Helper to enable debug on running instance
                      :return:
                      """
                      if not str2bool(self.settings.get('logging.autoconfigure')):
                          log.info('logging configuration based on main .ini file')
                          return
                      if logging_conf is None:
                          logging_conf = self.settings.get('logging.logging_conf_file') or ''
                      if not os.path.isfile(logging_conf):
                          log.error('Unable to setup logging based on %s, '
                                    'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf)
                          return
                      with open(logging_conf, 'rt') as f:
                          ini_template = textwrap.dedent(f.read())
                          ini_template = string.Template(ini_template).safe_substitute(
                              RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level,
                              RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter
                          )
                      with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f:
                          log.info('Saved Temporary LOGGING config at %s', f.name)
                          f.write(ini_template)
                      logging.config.fileConfig(f.name)
                      os.remove(f.name)
                  def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None):
                      input_val = self.settings.get(key, default)
                      if default_when_empty and not input_val:
                          # use default value when value is set in the config but it is empty
                          input_val = default
                      parser_func = {
                          'bool': self._bool_func,
                          'int': self._int_func,
                          'float': self._float_func,
                          'list': self._list_func,
                          'list:newline': functools.partial(self._list_func, sep='/n'),
                          'list:spacesep': functools.partial(self._list_func, sep=' '),
                          'string': functools.partial(self._string_func, lower=lower),
                          'string:noquote': functools.partial(self._string_no_quote_func, lower=lower),
                          'dir': self._dir_func,
                          'dir:ensured': functools.partial(self._dir_func, ensure_dir=True),
                          'file': self._file_path_func,
                          'file:ensured': functools.partial(self._file_path_func, ensure_dir=True),
                          None: lambda i: i
                      }[parser]
                      envvar_value = self.maybe_env_key(key)
                      if envvar_value:
                          input_val = envvar_value
                          set_keys[key] = input_val
                      self.settings[key] = parser_func(input_val)
                      return self.settings[key]

vcsserver/git_lfs/app.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import re
              import logging
              from pyramid.config import Configurator
              from pyramid.response import Response, FileIter
              from pyramid.httpexceptions import (
                  HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden,
                  HTTPUnprocessableEntity)
-             from vcsserver.lib.rc_json import json
+             from vcsserver.lib.ext_json import json
              from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
              from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator
-             from vcsserver.str_utils import safe_int
+             from vcsserver.lib.str_utils import safe_int
              log = logging.getLogger(__name__)
              GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs'  # +json ?
              GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
              def write_response_error(http_exception, text=None):
                  content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  _exception = http_exception(content_type=content_type)
                  _exception.content_type = content_type
                  if text:
                      _exception.body = json.dumps({'message': text})
                  log.debug('LFS: writing response of type %s to client with text:%s',
                            http_exception, text)
                  return _exception
              class AuthHeaderRequired:
                  """
                  Decorator to check if request has proper auth-header
                  """
                  def __call__(self, func):
                      return get_cython_compat_decorator(self.__wrapper, func)
                  def __wrapper(self, func, *fargs, **fkwargs):
                      request = fargs[1]
                      auth = request.authorization
                      if not auth:
                          return write_response_error(HTTPForbidden)
                      return func(*fargs[1:], **fkwargs)
              # views
              def lfs_objects(request):
                  # indicate not supported, V1 API
                  log.warning('LFS: v1 api not supported, reporting it back to client')
                  return write_response_error(HTTPNotImplemented, 'LFS: v1 api not supported')
              @AuthHeaderRequired()
              def lfs_objects_batch(request):
                  """
                  The client sends the following information to the Batch endpoint to transfer some objects:
                      operation - Should be download or upload.
                      transfers - An optional Array of String identifiers for transfer
                          adapters that the client has configured. If omitted, the basic
                          transfer adapter MUST be assumed by the server.
                      objects - An Array of objects to download.
                      oid - String OID of the LFS object.
                      size - Integer byte size of the LFS object. Must be at least zero.
                  """
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  auth = request.authorization
                  repo = request.matchdict.get('repo')
                  data = request.json
                  operation = data.get('operation')
                  http_scheme = request.registry.git_lfs_http_scheme
                  if operation not in ('download', 'upload'):
                      log.debug('LFS: unsupported operation:%s', operation)
                      return write_response_error(
                          HTTPBadRequest, f'unsupported operation mode: `{operation}`')
                  if 'objects' not in data:
                      log.debug('LFS: missing objects data')
                      return write_response_error(
                          HTTPBadRequest, 'missing objects data')
                  log.debug('LFS: handling operation of type: %s', operation)
                  objects = []
                  for o in data['objects']:
                      try:
                          oid = o['oid']
                          obj_size = o['size']
                      except KeyError:
                          log.exception('LFS, failed to extract data')
                          return write_response_error(
                              HTTPBadRequest, 'unsupported data in objects')
                      obj_data = {'oid': oid}
                      if http_scheme == 'http':
                          # Note(marcink): when using http, we might have a custom port
                          # so we skip setting it to http, url dispatch then wont generate a port in URL
                          # for development we need this
                          http_scheme = None
                      obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
                                                   _scheme=http_scheme)
                      obj_verify_href = request.route_url('lfs_objects_verify', repo=repo,
                                                          _scheme=http_scheme)
                      store = LFSOidStore(
                          oid, repo, store_location=request.registry.git_lfs_store_path)
                      handler = OidHandler(
                          store, repo, auth, oid, obj_size, obj_data,
                          obj_href, obj_verify_href)
                      # this verifies also OIDs
                      actions, errors = handler.exec_operation(operation)
                      if errors:
                          log.warning('LFS: got following errors: %s', errors)
                          obj_data['errors'] = errors
                      if actions:
                          obj_data['actions'] = actions
                      obj_data['size'] = obj_size
                      obj_data['authenticated'] = True
                      objects.append(obj_data)
                  result = {'objects': objects, 'transfer': 'basic'}
                  log.debug('LFS Response %s', safe_result(result))
                  return result
              def lfs_objects_oid_upload(request):
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  repo = request.matchdict.get('repo')
                  oid = request.matchdict.get('oid')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  engine = store.get_engine(mode='wb')
                  log.debug('LFS: starting chunked write of LFS oid: %s to storage', oid)
                  body = request.environ['wsgi.input']
                  with engine as f:
                      blksize = 64 * 1024  # 64kb
                      while True:
                          # read in chunks as stream comes in from Gunicorn
                          # this is a specific Gunicorn support function.
                          # might work differently on waitress
                          chunk = body.read(blksize)
                          if not chunk:
                              break
                          f.write(chunk)
                  return {'upload': 'ok'}
              def lfs_objects_oid_download(request):
                  repo = request.matchdict.get('repo')
                  oid = request.matchdict.get('oid')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
                          HTTPNotFound, f'requested file with oid `{oid}` not found in store')
                  # TODO(marcink): support range header ?
                  # Range: bytes=0-, `bytes=(\d+)\-.*`
                  f = open(store.oid_path, 'rb')
                  response = Response(
                      content_type='application/octet-stream', app_iter=FileIter(f))
                  response.headers.add('X-RC-LFS-Response-Oid', str(oid))
                  return response
              def lfs_objects_verify(request):
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  repo = request.matchdict.get('repo')
                  data = request.json
                  oid = data.get('oid')
                  size = safe_int(data.get('size'))
                  if not (oid and size):
                      return write_response_error(
                          HTTPBadRequest, 'missing oid and size in request data')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
                          HTTPNotFound, f'oid `{oid}` does not exists in store')
                  store_size = store.size_oid()
                  if store_size != size:
                      msg = 'requested file size mismatch store size:{} requested:{}'.format(
                          store_size, size)
                      return write_response_error(
                          HTTPUnprocessableEntity, msg)
                  return {'message': {'size': 'ok', 'in_store': 'ok'}}
              def lfs_objects_lock(request):
                  return write_response_error(
                      HTTPNotImplemented, 'GIT LFS locking api not supported')
              def not_found(request):
                  return write_response_error(
                      HTTPNotFound, 'request path not found')
              def lfs_disabled(request):
                  return write_response_error(
                      HTTPNotImplemented, 'GIT LFS disabled for this repo')
              def git_lfs_app(config):
                  # v1 API deprecation endpoint
                  config.add_route('lfs_objects',
                                   '/{repo:.*?[^/]}/info/lfs/objects')
                  config.add_view(lfs_objects, route_name='lfs_objects',
                                  request_method='POST', renderer='json')
                  # locking API
                  config.add_route('lfs_objects_lock',
                                   '/{repo:.*?[^/]}/info/lfs/locks')
                  config.add_view(lfs_objects_lock, route_name='lfs_objects_lock',
                                  request_method=('POST', 'GET'), renderer='json')
                  config.add_route('lfs_objects_lock_verify',
                                   '/{repo:.*?[^/]}/info/lfs/locks/verify')
                  config.add_view(lfs_objects_lock, route_name='lfs_objects_lock_verify',
                                  request_method=('POST', 'GET'), renderer='json')
                  # batch API
                  config.add_route('lfs_objects_batch',
                                   '/{repo:.*?[^/]}/info/lfs/objects/batch')
                  config.add_view(lfs_objects_batch, route_name='lfs_objects_batch',
                                  request_method='POST', renderer='json')
                  # oid upload/download API
                  config.add_route('lfs_objects_oid',
                                   '/{repo:.*?[^/]}/info/lfs/objects/{oid}')
                  config.add_view(lfs_objects_oid_upload, route_name='lfs_objects_oid',
                                  request_method='PUT', renderer='json')
                  config.add_view(lfs_objects_oid_download, route_name='lfs_objects_oid',
                                  request_method='GET', renderer='json')
                  # verification API
                  config.add_route('lfs_objects_verify',
                                   '/{repo:.*?[^/]}/info/lfs/verify')
                  config.add_view(lfs_objects_verify, route_name='lfs_objects_verify',
                                  request_method='POST', renderer='json')
                  # not found handler for API
                  config.add_notfound_view(not_found, renderer='json')
              def create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
                  config = Configurator()
                  if git_lfs_enabled:
                      config.include(git_lfs_app)
                      config.registry.git_lfs_store_path = git_lfs_store_path
                      config.registry.git_lfs_http_scheme = git_lfs_http_scheme
                  else:
                      # not found handler for API, reporting disabled LFS support
                      config.add_notfound_view(lfs_disabled, renderer='json')
                  app = config.make_wsgi_app()
                  return app

vcsserver/git_lfs/tests/test_lfs_app.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import pytest
              from webtest.app import TestApp as WebObTestApp
-             from vcsserver.lib.rc_json import json
-             from vcsserver.str_utils import safe_bytes
+             from vcsserver.lib.ext_json import json
+             from vcsserver.lib.str_utils import safe_bytes
              from vcsserver.git_lfs.app import create_app
              from vcsserver.git_lfs.lib import LFSOidStore
              @pytest.fixture(scope='function')
              def git_lfs_app(tmpdir):
                  custom_app = WebObTestApp(create_app(
                      git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
                      git_lfs_http_scheme='http'))
                  custom_app._store = str(tmpdir)
                  return custom_app
              @pytest.fixture(scope='function')
              def git_lfs_https_app(tmpdir):
                  custom_app = WebObTestApp(create_app(
                      git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
                      git_lfs_http_scheme='https'))
                  custom_app._store = str(tmpdir)
                  return custom_app
              @pytest.fixture()
              def http_auth():
                  return {'HTTP_AUTHORIZATION': "Basic XXXXX"}
              class TestLFSApplication:
                  def test_app_wrong_path(self, git_lfs_app):
                      git_lfs_app.get('/repo/info/lfs/xxx', status=404)
                  def test_app_deprecated_endpoint(self, git_lfs_app):
                      response = git_lfs_app.post('/repo/info/lfs/objects', status=501)
                      assert response.status_code == 501
                      assert json.loads(response.text) == {'message': 'LFS: v1 api not supported'}
                  def test_app_lock_verify_api_not_available(self, git_lfs_app):
                      response = git_lfs_app.post('/repo/info/lfs/locks/verify', status=501)
                      assert response.status_code == 501
                      assert json.loads(response.text) == {
                          'message': 'GIT LFS locking api not supported'}
                  def test_app_lock_api_not_available(self, git_lfs_app):
                      response = git_lfs_app.post('/repo/info/lfs/locks', status=501)
                      assert response.status_code == 501
                      assert json.loads(response.text) == {
                          'message': 'GIT LFS locking api not supported'}
                  def test_app_batch_api_missing_auth(self, git_lfs_app):
                      git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params={}, status=403)
                  def test_app_batch_api_unsupported_operation(self, git_lfs_app, http_auth):
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params={}, status=400,
                          extra_environ=http_auth)
                      assert json.loads(response.text) == {
                          'message': 'unsupported operation mode: `None`'}
                  def test_app_batch_api_missing_objects(self, git_lfs_app, http_auth):
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params={'operation': 'download'},
                          status=400, extra_environ=http_auth)
                      assert json.loads(response.text) == {
                          'message': 'missing objects data'}
                  def test_app_batch_api_unsupported_data_in_objects(
                          self, git_lfs_app, http_auth):
                      params = {'operation': 'download',
                                'objects': [{}]}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params, status=400,
                          extra_environ=http_auth)
                      assert json.loads(response.text) == {
                          'message': 'unsupported data in objects'}
                  def test_app_batch_api_download_missing_object(
                          self, git_lfs_app, http_auth):
                      params = {'operation': 'download',
                                'objects': [{'oid': '123', 'size': '1024'}]}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
                          {'authenticated': True,
                           'errors': {'error': {
                               'code': 404,
                               'message': 'object: 123 does not exist in store'}},
                           'oid': '123',
                           'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                  def test_app_batch_api_download(self, git_lfs_app, http_auth):
                      oid = '456'
                      oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
                          f.write(safe_bytes('OID_CONTENT'))
                      params = {'operation': 'download',
                                'objects': [{'oid': oid, 'size': '1024'}]}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
                          {'authenticated': True,
                           'actions': {
                               'download': {
                                   'header': {'Authorization': 'Basic XXXXX'},
                                   'href': 'http://localhost/repo/info/lfs/objects/456'},
                           },
                           'oid': '456',
                           'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                  def test_app_batch_api_upload(self, git_lfs_app, http_auth):
                      params = {'operation': 'upload',
                                'objects': [{'oid': '123', 'size': '1024'}]}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
                          {'authenticated': True,
                           'actions': {
                               'upload': {
                                   'header': {'Authorization': 'Basic XXXXX',
                                               'Transfer-Encoding': 'chunked'},
                                   'href': 'http://localhost/repo/info/lfs/objects/123'},
                               'verify': {
                                   'header': {'Authorization': 'Basic XXXXX'},
                                   'href': 'http://localhost/repo/info/lfs/verify'}
                           },
                           'oid': '123',
                           'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                  def test_app_batch_api_upload_for_https(self, git_lfs_https_app, http_auth):
                      params = {'operation': 'upload',
                                'objects': [{'oid': '123', 'size': '1024'}]}
                      response = git_lfs_https_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
                          {'authenticated': True,
                           'actions': {
                               'upload': {
                                   'header': {'Authorization': 'Basic XXXXX',
                                               'Transfer-Encoding': 'chunked'},
                                   'href': 'https://localhost/repo/info/lfs/objects/123'},
                               'verify': {
                                   'header': {'Authorization': 'Basic XXXXX'},
                                   'href': 'https://localhost/repo/info/lfs/verify'}
                           },
                           'oid': '123',
                           'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                  def test_app_verify_api_missing_data(self, git_lfs_app):
                      params = {'oid': 'missing'}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params,
                          status=400)
                      assert json.loads(response.text) == {
                          'message': 'missing oid and size in request data'}
                  def test_app_verify_api_missing_obj(self, git_lfs_app):
                      params = {'oid': 'missing', 'size': '1024'}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params,
                          status=404)
                      assert json.loads(response.text) == {
                          'message': 'oid `missing` does not exists in store'}
                  def test_app_verify_api_size_mismatch(self, git_lfs_app):
                      oid = 'existing'
                      oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
                          f.write(safe_bytes('OID_CONTENT'))
                      params = {'oid': oid, 'size': '1024'}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params, status=422)
                      assert json.loads(response.text) == {
                          'message': 'requested file size mismatch '
                                      'store size:11 requested:1024'}
                  def test_app_verify_api(self, git_lfs_app):
                      oid = 'existing'
                      oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
                          f.write(safe_bytes('OID_CONTENT'))
                      params = {'oid': oid, 'size': 11}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params)
                      assert json.loads(response.text) == {
                          'message': {'size': 'ok', 'in_store': 'ok'}}
                  def test_app_download_api_oid_not_existing(self, git_lfs_app):
                      oid = 'missing'
                      response = git_lfs_app.get(
                          '/repo/info/lfs/objects/{oid}'.format(oid=oid), status=404)
                      assert json.loads(response.text) == {
                          'message': 'requested file with oid `missing` not found in store'}
                  def test_app_download_api(self, git_lfs_app):
                      oid = 'existing'
                      oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
                          f.write(safe_bytes('OID_CONTENT'))
                      response = git_lfs_app.get(
                          '/repo/info/lfs/objects/{oid}'.format(oid=oid))
                      assert response
                  def test_app_upload(self, git_lfs_app):
                      oid = 'uploaded'
                      response = git_lfs_app.put(
                          '/repo/info/lfs/objects/{oid}'.format(oid=oid), params='CONTENT')
                      assert json.loads(response.text) == {'upload': 'ok'}
                      # verify that we actually wrote that OID
                      oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      assert os.path.isfile(oid_path)
                      assert 'CONTENT' == open(oid_path).read()

vcsserver/git_lfs/tests/test_lib.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import pytest
-             from vcsserver.str_utils import safe_bytes
+             from vcsserver.lib.str_utils import safe_bytes
              from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
              @pytest.fixture()
              def lfs_store(tmpdir):
                  repo = 'test'
                  oid = '123456789'
                  store = LFSOidStore(oid=oid, repo=repo, store_location=str(tmpdir))
                  return store
              @pytest.fixture()
              def oid_handler(lfs_store):
                  store = lfs_store
                  repo = store.repo
                  oid = store.oid
                  oid_handler = OidHandler(
                      store=store, repo_name=repo, auth=('basic', 'xxxx'),
                      oid=oid,
                      obj_size='1024', obj_data={}, obj_href='http://localhost/handle_oid',
                      obj_verify_href='http://localhost/verify')
                  return oid_handler
              class TestOidHandler:
                  @pytest.mark.parametrize('exec_action', [
                      'download',
                      'upload',
                  ])
                  def test_exec_action(self, exec_action, oid_handler):
                      handler = oid_handler.exec_operation(exec_action)
                      assert handler
                  def test_exec_action_undefined(self, oid_handler):
                      with pytest.raises(AttributeError):
                          oid_handler.exec_operation('wrong')
                  def test_download_oid_not_existing(self, oid_handler):
                      response, has_errors = oid_handler.exec_operation('download')
                      assert response is None
                      assert has_errors['error'] == {
                          'code': 404,
                          'message': 'object: 123456789 does not exist in store'}
                  def test_download_oid(self, oid_handler):
                      store = oid_handler.get_store()
                      if not os.path.isdir(os.path.dirname(store.oid_path)):
                          os.makedirs(os.path.dirname(store.oid_path))
                      with open(store.oid_path, 'wb') as f:
                          f.write(safe_bytes('CONTENT'))
                      response, has_errors = oid_handler.exec_operation('download')
                      assert has_errors is None
                      assert response['download'] == {
                          'header': {'Authorization': 'basic xxxx'},
                          'href': 'http://localhost/handle_oid'
                      }
                  def test_upload_oid_that_exists(self, oid_handler):
                      store = oid_handler.get_store()
                      if not os.path.isdir(os.path.dirname(store.oid_path)):
                          os.makedirs(os.path.dirname(store.oid_path))
                      with open(store.oid_path, 'wb') as f:
                          f.write(safe_bytes('CONTENT'))
                      oid_handler.obj_size = 7
                      response, has_errors = oid_handler.exec_operation('upload')
                      assert has_errors is None
                      assert response is None
                  def test_upload_oid_that_exists_but_has_wrong_size(self, oid_handler):
                      store = oid_handler.get_store()
                      if not os.path.isdir(os.path.dirname(store.oid_path)):
                          os.makedirs(os.path.dirname(store.oid_path))
                      with open(store.oid_path, 'wb') as f:
                          f.write(safe_bytes('CONTENT'))
                      oid_handler.obj_size = 10240
                      response, has_errors = oid_handler.exec_operation('upload')
                      assert has_errors is None
                      assert response['upload'] == {
                          'header': {'Authorization': 'basic xxxx',
                                     'Transfer-Encoding': 'chunked'},
                          'href': 'http://localhost/handle_oid',
                      }
                  def test_upload_oid(self, oid_handler):
                      response, has_errors = oid_handler.exec_operation('upload')
                      assert has_errors is None
                      assert response['upload'] == {
                          'header': {'Authorization': 'basic xxxx',
                                     'Transfer-Encoding': 'chunked'},
                          'href': 'http://localhost/handle_oid'
                      }
              class TestLFSStore:
                  def test_write_oid(self, lfs_store):
                      oid_location = lfs_store.oid_path
                      assert not os.path.isfile(oid_location)
                      engine = lfs_store.get_engine(mode='wb')
                      with engine as f:
                          f.write(safe_bytes('CONTENT'))
                      assert os.path.isfile(oid_location)
                  def test_detect_has_oid(self, lfs_store):
                      assert lfs_store.has_oid() is False
                      engine = lfs_store.get_engine(mode='wb')
                      with engine as f:
                          f.write(safe_bytes('CONTENT'))
                      assert lfs_store.has_oid() is True

vcsserver/hgcompat.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              """
              Mercurial libs compatibility
              """
              import mercurial
              from mercurial import demandimport
              # patch demandimport, due to bug in mercurial when it always triggers
              # demandimport.enable()
-             from vcsserver.str_utils import safe_bytes
+             from vcsserver.lib.str_utils import safe_bytes
              demandimport.enable = lambda *args, **kwargs: 1
              from mercurial import ui
              from mercurial import patch
              from mercurial import config
              from mercurial import extensions
              from mercurial import scmutil
              from mercurial import archival
              from mercurial import discovery
              from mercurial import unionrepo
              from mercurial import localrepo
              from mercurial import merge as hg_merge
              from mercurial import subrepo
              from mercurial import subrepoutil
              from mercurial import tags as hg_tag
              from mercurial import util as hgutil
              from mercurial.commands import clone, pull
              from mercurial.node import nullid
              from mercurial.context import memctx, memfilectx
              from mercurial.error import (
                  LookupError, RepoError, RepoLookupError, Abort, InterventionRequired,
                  RequirementError, ProgrammingError)
              from mercurial.hgweb import hgweb_mod
              from mercurial.localrepo import instance
              from mercurial.match import match, alwaysmatcher, patternmatcher
              from mercurial.mdiff import diffopts
              from mercurial.node import bin, hex
              from mercurial.encoding import tolocal
              from mercurial.discovery import findcommonoutgoing
              from mercurial.hg import peer
-             from mercurial.httppeer import makepeer
+             from mercurial.httppeer import make_peer
              from mercurial.utils.urlutil import url as hg_url
              from mercurial.scmutil import revrange, revsymbol
              from mercurial.node import nullrev
              from mercurial import exchange
              from hgext import largefiles
              # those authnadlers are patched for python 2.6.5 bug an
              # infinit looping when given invalid resources
              from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
              # hg strip is in core now
              from mercurial import strip as hgext_strip
              def get_ctx(repo, ref):
                  if not isinstance(ref, int):
                      ref = safe_bytes(ref)
                  try:
                      ctx = repo[ref]
                      return ctx
                  except (ProgrammingError, TypeError):
                      # we're unable to find the rev using a regular lookup, we fallback
                      # to slower, but backward compat revsymbol usage
                      pass
                  except (LookupError, RepoLookupError):
                      # Similar case as above but only for refs that are not numeric
                      if isinstance(ref, int):
                          raise
                  ctx = revsymbol(repo, ref)
                  return ctx

vcsserver/hook_utils/__init__.py

0 +11 -3

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import re
              import os
              import sys
              import datetime
              import logging
              import pkg_resources
              import vcsserver
              import vcsserver.settings
-             from vcsserver.str_utils import safe_bytes
+             from vcsserver.lib.str_utils import safe_bytes
              log = logging.getLogger(__name__)
              HOOKS_DIR_MODE = 0o755
              HOOKS_FILE_MODE = 0o755
              def set_permissions_if_needed(path_to_check, perms: oct):
                  # Get current permissions
                  current_permissions = os.stat(path_to_check).st_mode & 0o777  # Extract permission bits
                  # Check if current permissions are lower than required
                  if current_permissions < int(perms):
                      # Change the permissions if they are lower than required
                      os.chmod(path_to_check, perms)
              def get_git_hooks_path(repo_path, bare):
                  hooks_path = os.path.join(repo_path, 'hooks')
                  if not bare:
                      hooks_path = os.path.join(repo_path, '.git', 'hooks')
                  return hooks_path
              def install_git_hooks(repo_path, bare, executable=None, force_create=False):
                  """
                  Creates a RhodeCode hook inside a git repository
                  :param repo_path: path to repository
                  :param bare: defines if repository is considered a bare git repo
                  :param executable: binary executable to put in the hooks
                  :param force_create: Creates even if the same name hook exists
                  """
                  executable = executable or sys.executable
                  hooks_path = get_git_hooks_path(repo_path, bare)
                  # we always call it to ensure dir exists and it has a proper mode
                  if not os.path.exists(hooks_path):
                      # If it doesn't exist, create a new directory with the specified mode
                      os.makedirs(hooks_path, mode=HOOKS_DIR_MODE, exist_ok=True)
                  # If it exists, change the directory's mode to the specified mode
                  set_permissions_if_needed(hooks_path, perms=HOOKS_DIR_MODE)
                  tmpl_post = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'git_post_receive.py.tmpl')))
                  tmpl_pre = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'git_pre_receive.py.tmpl')))
                  path = ''  # not used for now
                  timestamp = datetime.datetime.utcnow().isoformat()
                  for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
                      log.debug('Installing git hook in repo %s', repo_path)
                      _hook_file = os.path.join(hooks_path, f'{h_type}-receive')
                      _rhodecode_hook = check_rhodecode_hook(_hook_file)
                      if _rhodecode_hook or force_create:
                          log.debug('writing git %s hook file at %s !', h_type, _hook_file)
+                         env_expand = str([
+                             ('RC_INI_FILE',  vcsserver.CONFIG['__file__']),
+                             ('RC_CORE_BINARY_DIR', vcsserver.settings.BINARY_DIR),
+                             ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE()),
+                             ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE()),
+                             ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE()),
+                         ])
                          try:
                              with open(_hook_file, 'wb') as f:
+                                 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
                                  template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
                                  template = template.replace(b'_DATE_', safe_bytes(timestamp))
                                  template = template.replace(b'_ENV_', safe_bytes(executable))
                                  template = template.replace(b'_PATH_', safe_bytes(path))
                                  f.write(template)
                              set_permissions_if_needed(_hook_file, perms=HOOKS_FILE_MODE)
                          except OSError:
                              log.exception('error writing hook file %s', _hook_file)
                      else:
                          log.debug('skipping writing hook file')
                  return True
              def get_svn_hooks_path(repo_path):
                  hooks_path = os.path.join(repo_path, 'hooks')
                  return hooks_path
              def install_svn_hooks(repo_path, executable=None, force_create=False):
                  """
                  Creates RhodeCode hooks inside a svn repository
                  :param repo_path: path to repository
                  :param executable: binary executable to put in the hooks
                  :param force_create: Create even if same name hook exists
                  """
                  executable = executable or sys.executable
                  hooks_path = get_svn_hooks_path(repo_path)
                  if not os.path.isdir(hooks_path):
                      os.makedirs(hooks_path, mode=0o777, exist_ok=True)
                  tmpl_post = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'svn_post_commit_hook.py.tmpl')))
                  tmpl_pre = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'svn_pre_commit_hook.py.tmpl')))
                  path = ''  # not used for now
                  timestamp = datetime.datetime.utcnow().isoformat()
                  for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
                      log.debug('Installing svn hook in repo %s', repo_path)
                      _hook_file = os.path.join(hooks_path, f'{h_type}-commit')
                      _rhodecode_hook = check_rhodecode_hook(_hook_file)
                      if _rhodecode_hook or force_create:
                          log.debug('writing svn %s hook file at %s !', h_type, _hook_file)
                          env_expand = str([
+                             ('RC_INI_FILE',  vcsserver.CONFIG['__file__']),
                              ('RC_CORE_BINARY_DIR', vcsserver.settings.BINARY_DIR),
                              ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE()),
                              ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE()),
                              ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE()),
                          ])
                          try:
                              with open(_hook_file, 'wb') as f:
+                                 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
                                  template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
                                  template = template.replace(b'_DATE_', safe_bytes(timestamp))
-                                 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
                                  template = template.replace(b'_ENV_', safe_bytes(executable))
                                  template = template.replace(b'_PATH_', safe_bytes(path))
                                  f.write(template)
                              os.chmod(_hook_file, 0o755)
                          except OSError:
                              log.exception('error writing hook file %s', _hook_file)
                      else:
                          log.debug('skipping writing hook file')
                  return True
              def get_version_from_hook(hook_path):
                  version = b''
                  hook_content = read_hook_content(hook_path)
                  matches = re.search(rb'RC_HOOK_VER\s*=\s*(.*)', hook_content)
                  if matches:
                      try:
                          version = matches.groups()[0]
                          log.debug('got version %s from hooks.', version)
                      except Exception:
                          log.exception("Exception while reading the hook version.")
                  return version.replace(b"'", b"")
              def check_rhodecode_hook(hook_path):
                  """
                  Check if the hook was created by RhodeCode
                  """
                  if not os.path.exists(hook_path):
                      return True
                  log.debug('hook exists, checking if it is from RhodeCode')
                  version = get_version_from_hook(hook_path)
                  if version:
                      return True
                  return False
              def read_hook_content(hook_path) -> bytes:
                  content = b''
                  if os.path.isfile(hook_path):
                      with open(hook_path, 'rb') as f:
                          content = f.read()
                  return content
              def get_git_pre_hook_version(repo_path, bare):
                  hooks_path = get_git_hooks_path(repo_path, bare)
                  _hook_file = os.path.join(hooks_path, 'pre-receive')
                  version = get_version_from_hook(_hook_file)
                  return version
              def get_git_post_hook_version(repo_path, bare):
                  hooks_path = get_git_hooks_path(repo_path, bare)
                  _hook_file = os.path.join(hooks_path, 'post-receive')
                  version = get_version_from_hook(_hook_file)
                  return version
              def get_svn_pre_hook_version(repo_path):
                  hooks_path = get_svn_hooks_path(repo_path)
                  _hook_file = os.path.join(hooks_path, 'pre-commit')
                  version = get_version_from_hook(_hook_file)
                  return version
              def get_svn_post_hook_version(repo_path):
                  hooks_path = get_svn_hooks_path(repo_path)
                  _hook_file = os.path.join(hooks_path, 'post-commit')
                  version = get_version_from_hook(_hook_file)
                  return version

vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl

0 +9 -1

              #!_ENV_
              import os
              import sys
              path_adjust = [_PATH_]
              if path_adjust:
                  sys.path = path_adjust
+             # special trick to pass in some information from rc to hooks
+             # mod_dav strips ALL env vars and we can't even access things like PATH
+             for env_k, env_v in _OS_EXPAND_:
+                 os.environ[env_k] = env_v
              try:
                  from vcsserver import hooks
              except ImportError:
                  if os.environ.get('RC_DEBUG_GIT_HOOK'):
                      import traceback
                      print(traceback.format_exc())
                  hooks = None
              # TIMESTAMP: _DATE_
              RC_HOOK_VER = '_TMPL_'
              def main():
                  if hooks is None:
                      # exit with success if we cannot import vcsserver.hooks !!
                      # this allows simply push to this repo even without rhodecode
                      sys.exit(0)
                  if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_GIT_HOOKS'):
                      sys.exit(0)
                  repo_path = os.getcwd()
                  push_data = sys.stdin.readlines()
-                 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  # os.environ is modified here by a subprocess call that
                  # runs git and later git executes this hook.
                  # Environ gets some additional info from rhodecode system
                  # like IP or username from basic-auth
+                 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  try:
                      result = hooks.git_post_receive(repo_path, push_data, os.environ)
                      sys.exit(result)
                  except Exception as error:
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
                      print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)
              if __name__ == '__main__':
                  main()

vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl

0 +9 -1

              #!_ENV_
              import os
              import sys
              path_adjust = [_PATH_]
              if path_adjust:
                  sys.path = path_adjust
+             # special trick to pass in some information from rc to hooks
+             # mod_dav strips ALL env vars and we can't even access things like PATH
+             for env_k, env_v in _OS_EXPAND_:
+                 os.environ[env_k] = env_v
              try:
                  from vcsserver import hooks
              except ImportError:
                  if os.environ.get('RC_DEBUG_GIT_HOOK'):
                      import traceback
                      print(traceback.format_exc())
                  hooks = None
              # TIMESTAMP: _DATE_
              RC_HOOK_VER = '_TMPL_'
              def main():
                  if hooks is None:
                      # exit with success if we cannot import vcsserver.hooks !!
                      # this allows simply push to this repo even without rhodecode
                      sys.exit(0)
                  if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_GIT_HOOKS'):
                      sys.exit(0)
                  repo_path = os.getcwd()
                  push_data = sys.stdin.readlines()
-                 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  # os.environ is modified here by a subprocess call that
                  # runs git and later git executes this hook.
                  # Environ gets some additional info from rhodecode system
                  # like IP or username from basic-auth
+                 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  try:
                      result = hooks.git_pre_receive(repo_path, push_data, os.environ)
                      sys.exit(result)
                  except Exception as error:
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
                      print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)
              if __name__ == '__main__':
                  main()

vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl

0 +7 -2

              #!_ENV_
              import os
              import sys
              path_adjust = [_PATH_]
              if path_adjust:
                  sys.path = path_adjust
+             # special trick to pass in some information from rc to hooks
+             # mod_dav strips ALL env vars and we can't even access things like PATH
+             for env_k, env_v in _OS_EXPAND_:
+                 os.environ[env_k] = env_v
              try:
                  from vcsserver import hooks
              except ImportError:
                  if os.environ.get('RC_DEBUG_SVN_HOOK'):
                      import traceback
                      print(traceback.format_exc())
                  hooks = None
              # TIMESTAMP: _DATE_
              RC_HOOK_VER = '_TMPL_'
              # special trick to pass in some information from rc to hooks
              # mod_dav strips ALL env vars and we can't even access things like PATH
              for env_k, env_v in _OS_EXPAND_:
                  os.environ[env_k] = env_v
              def main():
                  if hooks is None:
                      # exit with success if we cannot import vcsserver.hooks !!
                      # this allows simply push to this repo even without rhodecode
                      sys.exit(0)
                  if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
                      sys.exit(0)
-                 repo_path = os.getcwd()
+                 cwd_repo_path = os.getcwd()
                  push_data = sys.argv[1:]
                  os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  try:
-                     result = hooks.svn_post_commit(repo_path, push_data, os.environ)
+                     result = hooks.svn_post_commit(cwd_repo_path, push_data, os.environ)
                      sys.exit(result)
                  except Exception as error:
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
                      print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)
              if __name__ == '__main__':
                  main()

vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl

0 +7 -3

              #!_ENV_
              import os
              import sys
              path_adjust = [_PATH_]
              if path_adjust:
                  sys.path = path_adjust
+             # special trick to pass in some information from rc to hooks
+             # mod_dav strips ALL env vars and we can't even access things like PATH
+             for env_k, env_v in _OS_EXPAND_:
+                 os.environ[env_k] = env_v
              try:
                  from vcsserver import hooks
              except ImportError:
                  if os.environ.get('RC_DEBUG_SVN_HOOK'):
                      import traceback
                      print(traceback.format_exc())
                  hooks = None
              # TIMESTAMP: _DATE_
              RC_HOOK_VER = '_TMPL_'
              # special trick to pass in some information from rc to hooks
              # mod_dav strips ALL env vars and we can't even access things like PATH
              for env_k, env_v in _OS_EXPAND_:
                  os.environ[env_k] = env_v
              def main():
                  if os.environ.get('SSH_READ_ONLY') == '1':
                      sys.stderr.write('Only read-only access is allowed')
                      sys.exit(1)
                  if hooks is None:
                      # exit with success if we cannot import vcsserver.hooks !!
                      # this allows simply push to this repo even without rhodecode
                      sys.exit(0)
                  if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
                      sys.exit(0)
-                 repo_path = os.getcwd()
+                 cwd_repo_path = os.getcwd()
                  push_data = sys.argv[1:]
                  os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  try:
-                     result = hooks.svn_pre_commit(repo_path, push_data, os.environ)
+                     result = hooks.svn_pre_commit(cwd_repo_path, push_data, os.environ)
                      sys.exit(result)
                  except Exception as error:
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
                      print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)
              if __name__ == '__main__':
                  main()

vcsserver/hooks.py

0 +45 -49

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import io
              import os
              import sys
              import logging
              import collections
              import base64
              import msgpack
              import dataclasses
              import pygit2
              import http.client
              from celery import Celery
              import mercurial.scmutil
              import mercurial.node
-             from vcsserver.lib.rc_json import json
              from vcsserver import exceptions, subprocessio, settings
-             from vcsserver.str_utils import ascii_str, safe_str
+             from vcsserver.lib.ext_json import json
+             from vcsserver.lib.str_utils import ascii_str, safe_str
+             from vcsserver.lib.svn_txn_utils import get_txn_id_from_store
              from vcsserver.remote.git_remote import Repository
              celery_app = Celery('__vcsserver__')
              log = logging.getLogger(__name__)
              class HooksHttpClient:
                  proto = 'msgpack.v1'
                  connection = None
                  def __init__(self, hooks_uri):
                      self.hooks_uri = hooks_uri
                  def __repr__(self):
                      return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
                  def __call__(self, method, extras):
                      connection = http.client.HTTPConnection(self.hooks_uri)
                      # binary msgpack body
                      headers, body = self._serialize(method, extras)
                      log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
                      try:
                          try:
                              connection.request('POST', '/', body, headers)
                          except Exception as error:
                              log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
                              raise
                          response = connection.getresponse()
                          try:
                              return msgpack.load(response)
                          except Exception:
                              response_data = response.read()
                              log.exception('Failed to decode hook response json data. '
                                            'response_code:%s, raw_data:%s',
                                            response.status, response_data)
                              raise
                      finally:
                          connection.close()
                  @classmethod
                  def _serialize(cls, hook_name, extras):
                      data = {
                          'method': hook_name,
                          'extras': extras
                      }
                      headers = {
                          "rc-hooks-protocol": cls.proto,
                          "Connection": "keep-alive"
                      }
                      return headers, msgpack.packb(data)
              class HooksCeleryClient:
                  TASK_TIMEOUT = 60  # time in seconds
                  def __init__(self, queue, backend):
                      celery_app.config_from_object({
                          'broker_url': queue, 'result_backend': backend,
                          'broker_connection_retry_on_startup': True,
-                         'task_serializer': 'msgpack',
+                         'task_serializer': 'json',
                          'accept_content': ['json', 'msgpack'],
-                         'result_serializer': 'msgpack',
+                         'result_serializer': 'json',
                          'result_accept_content': ['json', 'msgpack']
                      })
                      self.celery_app = celery_app
                  def __call__(self, method, extras):
                      inquired_task = self.celery_app.signature(
                          f'rhodecode.lib.celerylib.tasks.{method}'
                      )
                      return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT)
              class HooksShadowRepoClient:
                  def __call__(self, hook_name, extras):
                      return {'output': '', 'status': 0}
              class RemoteMessageWriter:
                  """Writer base class."""
                  def write(self, message):
                      raise NotImplementedError()
              class HgMessageWriter(RemoteMessageWriter):
                  """Writer that knows how to send messages to mercurial clients."""
                  def __init__(self, ui):
                      self.ui = ui
                  def write(self, message: str):
                      # TODO: Check why the quiet flag is set by default.
                      old = self.ui.quiet
                      self.ui.quiet = False
                      self.ui.status(message.encode('utf-8'))
                      self.ui.quiet = old
              class GitMessageWriter(RemoteMessageWriter):
                  """Writer that knows how to send messages to git clients."""
                  def __init__(self, stdout=None):
                      self.stdout = stdout or sys.stdout
                  def write(self, message: str):
                      self.stdout.write(message)
              class SvnMessageWriter(RemoteMessageWriter):
                  """Writer that knows how to send messages to svn clients."""
                  def __init__(self, stderr=None):
                      # SVN needs data sent to stderr for back-to-client messaging
                      self.stderr = stderr or sys.stderr
                  def write(self, message):
                      self.stderr.write(message)
              def _handle_exception(result):
                  exception_class = result.get('exception')
                  exception_traceback = result.get('exception_traceback')
                  log.debug('Handling hook-call exception: %s', exception_class)
                  if exception_traceback:
                      log.error('Got traceback from remote call:%s', exception_traceback)
                  if exception_class == 'HTTPLockedRC':
                      raise exceptions.RepositoryLockedException()(*result['exception_args'])
                  elif exception_class == 'HTTPBranchProtected':
                      raise exceptions.RepositoryBranchProtectedException()(*result['exception_args'])
                  elif exception_class == 'RepositoryError':
                      raise exceptions.VcsException()(*result['exception_args'])
                  elif exception_class:
                      raise Exception(
                          f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
                      )
              def _get_hooks_client(extras):
                  hooks_uri = extras.get('hooks_uri')
                  task_queue = extras.get('task_queue')
                  task_backend = extras.get('task_backend')
                  is_shadow_repo = extras.get('is_shadow_repo')
                  if hooks_uri:
                      return HooksHttpClient(hooks_uri)
                  elif task_queue and task_backend:
                      return HooksCeleryClient(task_queue, task_backend)
                  elif is_shadow_repo:
                      return HooksShadowRepoClient()
                  else:
                      raise Exception("Hooks client not found!")
              def _call_hook(hook_name, extras, writer):
                  hooks_client = _get_hooks_client(extras)
                  log.debug('Hooks, using client:%s', hooks_client)
                  result = hooks_client(hook_name, extras)
                  log.debug('Hooks got result: %s', result)
                  _handle_exception(result)
                  writer.write(result['output'])
                  return result['status']
              def _extras_from_ui(ui):
                  hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
                  if not hook_data:
                      # maybe it's inside environ ?
                      env_hook_data = os.environ.get('RC_SCM_DATA')
                      if env_hook_data:
                          hook_data = env_hook_data
                  extras = {}
                  if hook_data:
                      extras = json.loads(hook_data)
                  return extras
              def _rev_range_hash(repo, node, check_heads=False):
                  from vcsserver.hgcompat import get_ctx
                  commits = []
                  revs = []
                  start = get_ctx(repo, node).rev()
                  end = len(repo)
                  for rev in range(start, end):
                      revs.append(rev)
                      ctx = get_ctx(repo, rev)
                      commit_id = ascii_str(mercurial.node.hex(ctx.node()))
                      branch = safe_str(ctx.branch())
                      commits.append((commit_id, branch))
                  parent_heads = []
                  if check_heads:
                      parent_heads = _check_heads(repo, start, end, revs)
                  return commits, parent_heads
              def _check_heads(repo, start, end, commits):
                  from vcsserver.hgcompat import get_ctx
                  changelog = repo.changelog
                  parents = set()
                  for new_rev in commits:
                      for p in changelog.parentrevs(new_rev):
                          if p == mercurial.node.nullrev:
                              continue
                          if p < start:
                              parents.add(p)
                  for p in parents:
                      branch = get_ctx(repo, p).branch()
                      # The heads descending from that parent, on the same branch
                      parent_heads = {p}
                      reachable = {p}
                      for x in range(p + 1, end):
                          if get_ctx(repo, x).branch() != branch:
                              continue
                          for pp in changelog.parentrevs(x):
                              if pp in reachable:
                                  reachable.add(x)
                                  parent_heads.discard(pp)
                                  parent_heads.add(x)
                      # More than one head? Suggest merging
                      if len(parent_heads) > 1:
                          return list(parent_heads)
                  return []
              def _get_git_env():
                  env = {}
                  for k, v in os.environ.items():
                      if k.startswith('GIT'):
                          env[k] = v
                  # serialized version
                  return [(k, v) for k, v in env.items()]
              def _get_hg_env(old_rev, new_rev, txnid, repo_path):
                  env = {}
                  for k, v in os.environ.items():
                      if k.startswith('HG'):
                          env[k] = v
                  env['HG_NODE'] = old_rev
                  env['HG_NODE_LAST'] = new_rev
                  env['HG_TXNID'] = txnid
                  env['HG_PENDING'] = repo_path
                  return [(k, v) for k, v in env.items()]
+             def _get_ini_settings(ini_file):
+                 from vcsserver.http_main import sanitize_settings_and_apply_defaults
+                 from vcsserver.lib.config_utils import get_app_config_lightweight, configure_and_store_settings
+                 global_config = {'__file__': ini_file}
+                 ini_settings = get_app_config_lightweight(ini_file)
+                 sanitize_settings_and_apply_defaults(global_config, ini_settings)
+                 configure_and_store_settings(global_config, ini_settings)
+                 return ini_settings
              def _fix_hooks_executables(ini_path=''):
                  """
                  This is a trick to set proper settings.EXECUTABLE paths for certain execution patterns
                  especially for subversion where hooks strip entire env, and calling just 'svn' command will most likely fail
                  because svn is not on PATH
                  """
-                 from vcsserver.http_main import sanitize_settings_and_apply_defaults
-                 from vcsserver.lib.config_utils import get_app_config_lightweight
+                 # set defaults, in case we can't read from ini_file
                  core_binary_dir = settings.BINARY_DIR or '/usr/local/bin/rhodecode_bin/vcs_bin'
                  if ini_path:
-                     ini_settings = get_app_config_lightweight(ini_path)
-                     ini_settings = sanitize_settings_and_apply_defaults({'__file__': ini_path}, ini_settings)
+                     ini_settings = _get_ini_settings(ini_path)
                      core_binary_dir = ini_settings['core.binary_dir']
                  settings.BINARY_DIR = core_binary_dir
              def repo_size(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  return _call_hook('repo_size', extras, HgMessageWriter(ui))
              def pre_pull(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  return _call_hook('pre_pull', extras, HgMessageWriter(ui))
              def pre_pull_ssh(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  if extras and extras.get('SSH'):
                      return pre_pull(ui, repo, **kwargs)
                  return 0
              def post_pull(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  return _call_hook('post_pull', extras, HgMessageWriter(ui))
              def post_pull_ssh(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  if extras and extras.get('SSH'):
                      return post_pull(ui, repo, **kwargs)
                  return 0
              def pre_push(ui, repo, node=None, **kwargs):
                  """
                  Mercurial pre_push hook
                  """
                  extras = _extras_from_ui(ui)
                  detect_force_push = extras.get('detect_force_push')
                  rev_data = []
                  hook_type: str = safe_str(kwargs.get('hooktype'))
                  if node and hook_type == 'pretxnchangegroup':
                      branches = collections.defaultdict(list)
                      commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
                      for commit_id, branch in commits:
                          branches[branch].append(commit_id)
                      for branch, commits in branches.items():
                          old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
                          rev_data.append({
                              'total_commits': len(commits),
                              'old_rev': old_rev,
                              'new_rev': commits[-1],
                              'ref': '',
                              'type': 'branch',
                              'name': branch,
                          })
                      for push_ref in rev_data:
                          push_ref['multiple_heads'] = _heads
                          repo_path = os.path.join(
                              extras.get('repo_store', ''), extras.get('repository', ''))
                          push_ref['hg_env'] = _get_hg_env(
                              old_rev=push_ref['old_rev'],
                              new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
                              repo_path=repo_path)
                  extras['hook_type'] = hook_type or 'pre_push'
                  extras['commit_ids'] = rev_data
                  return _call_hook('pre_push', extras, HgMessageWriter(ui))
              def pre_push_ssh(ui, repo, node=None, **kwargs):
                  extras = _extras_from_ui(ui)
                  if extras.get('SSH'):
                      return pre_push(ui, repo, node, **kwargs)
                  return 0
              def pre_push_ssh_auth(ui, repo, node=None, **kwargs):
                  """
                  Mercurial pre_push hook for SSH
                  """
                  extras = _extras_from_ui(ui)
                  if extras.get('SSH'):
                      permission = extras['SSH_PERMISSIONS']
                      if 'repository.write' == permission or 'repository.admin' == permission:
                          return 0
                      # non-zero ret code
                      return 1
                  return 0
              def post_push(ui, repo, node, **kwargs):
                  """
                  Mercurial post_push hook
                  """
                  extras = _extras_from_ui(ui)
                  commit_ids = []
                  branches = []
                  bookmarks = []
                  tags = []
                  hook_type: str = safe_str(kwargs.get('hooktype'))
                  commits, _heads = _rev_range_hash(repo, node)
                  for commit_id, branch in commits:
                      commit_ids.append(commit_id)
                      if branch not in branches:
                          branches.append(branch)
                  if hasattr(ui, '_rc_pushkey_bookmarks'):
                      bookmarks = ui._rc_pushkey_bookmarks
                  extras['hook_type'] = hook_type or 'post_push'
                  extras['commit_ids'] = commit_ids
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': bookmarks,
                      'tags': tags
                  }
                  return _call_hook('post_push', extras, HgMessageWriter(ui))
              def post_push_ssh(ui, repo, node, **kwargs):
                  """
                  Mercurial post_push hook for SSH
                  """
                  if _extras_from_ui(ui).get('SSH'):
                      return post_push(ui, repo, node, **kwargs)
                  return 0
              def key_push(ui, repo, **kwargs):
                  from vcsserver.hgcompat import get_ctx
                  if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
                      # store new bookmarks in our UI object propagated later to post_push
                      ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
                  return
              # backward compat
              log_pull_action = post_pull
              # backward compat
              log_push_action = post_push
              def handle_git_pre_receive(unused_repo_path, unused_revs, unused_env):
                  """
                  Old hook name: keep here for backward compatibility.
                  This is only required when the installed git hooks are not upgraded.
                  """
                  pass
              def handle_git_post_receive(unused_repo_path, unused_revs, unused_env):
                  """
                  Old hook name: keep here for backward compatibility.
                  This is only required when the installed git hooks are not upgraded.
                  """
                  pass
              @dataclasses.dataclass
              class HookResponse:
                  status: int
                  output: str
              def git_pre_pull(extras) -> HookResponse:
                  """
                  Pre pull hook.
                  :param extras: dictionary containing the keys defined in simplevcs
                  :type extras: dict
                  :return: status code of the hook. 0 for success.
                  :rtype: int
                  """
                  if 'pull' not in extras['hooks']:
                      return HookResponse(0, '')
                  stdout = io.StringIO()
                  try:
                      status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
                  except Exception as error:
                      log.exception('Failed to call pre_pull hook')
                      status_code = 128
                      stdout.write(f'ERROR: {error}\n')
                  return HookResponse(status_code, stdout.getvalue())
              def git_post_pull(extras) -> HookResponse:
                  """
                  Post pull hook.
                  :param extras: dictionary containing the keys defined in simplevcs
                  :type extras: dict
                  :return: status code of the hook. 0 for success.
                  :rtype: int
                  """
                  if 'pull' not in extras['hooks']:
                      return HookResponse(0, '')
                  stdout = io.StringIO()
                  try:
                      status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
                  except Exception as error:
                      status = 128
                      stdout.write(f'ERROR: {error}\n')
                  return HookResponse(status, stdout.getvalue())
              def _parse_git_ref_lines(revision_lines):
                  rev_data = []
                  for revision_line in revision_lines or []:
                      old_rev, new_rev, ref = revision_line.strip().split(' ')
                      ref_data = ref.split('/', 2)
                      if ref_data[1] in ('tags', 'heads'):
                          rev_data.append({
                              # NOTE(marcink):
                              # we're unable to tell total_commits for git at this point
                              # but we set the variable for consistency with GIT
                              'total_commits': -1,
                              'old_rev': old_rev,
                              'new_rev': new_rev,
                              'ref': ref,
                              'type': ref_data[1],
                              'name': ref_data[2],
                          })
                  return rev_data
              def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
                  """
                  Pre push hook.
                  :return: status code of the hook. 0 for success.
                  """
                  extras = json.loads(env['RC_SCM_DATA'])
                  rev_data = _parse_git_ref_lines(revision_lines)
                  if 'push' not in extras['hooks']:
                      return 0
-                 _fix_hooks_executables()
+                 _fix_hooks_executables(env.get('RC_INI_FILE'))
                  empty_commit_id = '0' * 40
                  detect_force_push = extras.get('detect_force_push')
                  for push_ref in rev_data:
                      # store our git-env which holds the temp store
                      push_ref['git_env'] = _get_git_env()
                      push_ref['pruned_sha'] = ''
                      if not detect_force_push:
                          # don't check for forced-push when we don't need to
                          continue
                      type_ = push_ref['type']
                      new_branch = push_ref['old_rev'] == empty_commit_id
                      delete_branch = push_ref['new_rev'] == empty_commit_id
                      if type_ == 'heads' and not (new_branch or delete_branch):
                          old_rev = push_ref['old_rev']
                          new_rev = push_ref['new_rev']
                          cmd = [settings.GIT_EXECUTABLE(), 'rev-list', old_rev, f'^{new_rev}']
                          stdout, stderr = subprocessio.run_command(
                              cmd, env=os.environ.copy())
                          # means we're having some non-reachable objects, this forced push was used
                          if stdout:
                              push_ref['pruned_sha'] = stdout.splitlines()
                  extras['hook_type'] = 'pre_receive'
                  extras['commit_ids'] = rev_data
                  stdout = sys.stdout
                  status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
                  return status_code
              def git_post_receive(unused_repo_path, revision_lines, env) -> int:
                  """
                  Post push hook.
                  :return: status code of the hook. 0 for success.
                  """
                  extras = json.loads(env['RC_SCM_DATA'])
                  if 'push' not in extras['hooks']:
                      return 0
-                 _fix_hooks_executables()
+                 _fix_hooks_executables(env.get('RC_INI_FILE'))
                  rev_data = _parse_git_ref_lines(revision_lines)
                  git_revs = []
                  # N.B.(skreft): it is ok to just call git, as git before calling a
                  # subcommand sets the PATH environment variable so that it point to the
                  # correct version of the git executable.
                  empty_commit_id = '0' * 40
                  branches = []
                  tags = []
                  for push_ref in rev_data:
                      type_ = push_ref['type']
                      if type_ == 'heads':
                          # starting new branch case
                          if push_ref['old_rev'] == empty_commit_id:
                              push_ref_name = push_ref['name']
                              if push_ref_name not in branches:
                                  branches.append(push_ref_name)
                              need_head_set = ''
                              with Repository(os.getcwd()) as repo:
                                  try:
                                      repo.head
                                  except pygit2.GitError:
                                      need_head_set = f'refs/heads/{push_ref_name}'
                                  if need_head_set:
                                      repo.set_head(need_head_set)
                                      print(f"Setting default branch to {push_ref_name}")
                              cmd = [settings.GIT_EXECUTABLE(), 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
                              heads = safe_str(stdout)
                              heads = heads.replace(push_ref['ref'], '')
                              heads = ' '.join(head for head
                                               in heads.splitlines() if head) or '.'
                              cmd = [settings.GIT_EXECUTABLE(), 'log', '--reverse',
                                     '--pretty=format:%H', '--', push_ref['new_rev'],
                                     '--not', heads]
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
                              git_revs.extend(list(map(ascii_str, stdout.splitlines())))
                          # delete branch case
                          elif push_ref['new_rev'] == empty_commit_id:
                              git_revs.append(f'delete_branch=>{push_ref["name"]}')
                          else:
                              if push_ref['name'] not in branches:
                                  branches.append(push_ref['name'])
                              cmd = [settings.GIT_EXECUTABLE(), 'log',
                                     f'{push_ref["old_rev"]}..{push_ref["new_rev"]}',
                                     '--reverse', '--pretty=format:%H']
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
                              # we get bytes from stdout, we need str to be consistent
                              log_revs = list(map(ascii_str, stdout.splitlines()))
                              git_revs.extend(log_revs)
                              # Pure pygit2 impl. but still 2-3x slower :/
                              # results = []
                              #
                              # with Repository(os.getcwd()) as repo:
                              #     repo_new_rev = repo[push_ref['new_rev']]
                              #     repo_old_rev = repo[push_ref['old_rev']]
                              #     walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
                              #
                              #     for commit in walker:
                              #         if commit.id == repo_old_rev.id:
                              #             break
                              #         results.append(commit.id.hex)
                              #     # reverse the order, can't use GIT_SORT_REVERSE
                              #     log_revs = results[::-1]
                      elif type_ == 'tags':
                          if push_ref['name'] not in tags:
                              tags.append(push_ref['name'])
                          git_revs.append(f'tag=>{push_ref["name"]}')
                  extras['hook_type'] = 'post_receive'
                  extras['commit_ids'] = git_revs
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': [],
                      'tags': tags,
                  }
                  stdout = sys.stdout
                  if 'repo_size' in extras['hooks']:
                      try:
                          _call_hook('repo_size', extras, GitMessageWriter(stdout))
                      except Exception:
                          pass
                  status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
                  return status_code
-             def _get_extras_from_txn_id(path, txn_id):
-                 _fix_hooks_executables()
-                 extras = {}
-                 try:
-                     cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
-                            '-t', txn_id,
-                            '--revprop', path, 'rc-scm-extras']
-                     stdout, stderr = subprocessio.run_command(
-                         cmd, env=os.environ.copy())
-                     extras = json.loads(base64.urlsafe_b64decode(stdout))
-                 except Exception:
-                     log.exception('Failed to extract extras info from txn_id')
-                 return extras
-             def _get_extras_from_commit_id(commit_id, path):
-                 _fix_hooks_executables()
-                 extras = {}
-                 try:
-                     cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
-                            '-r', commit_id,
-                            '--revprop', path, 'rc-scm-extras']
-                     stdout, stderr = subprocessio.run_command(
-                         cmd, env=os.environ.copy())
-                     extras = json.loads(base64.urlsafe_b64decode(stdout))
-                 except Exception:
-                     log.exception('Failed to extract extras info from commit_id')
+             def get_extras_from_txn_id(repo_path, txn_id):
+                 extras = get_txn_id_from_store(repo_path, txn_id)
                  return extras
              def svn_pre_commit(repo_path, commit_data, env):
                  path, txn_id = commit_data
                  branches = []
                  tags = []
                  if env.get('RC_SCM_DATA'):
                      extras = json.loads(env['RC_SCM_DATA'])
                  else:
+                     ini_path = env.get('RC_INI_FILE')
+                     if ini_path:
+                         _get_ini_settings(ini_path)
                      # fallback method to read from TXN-ID stored data
-                     extras = _get_extras_from_txn_id(path, txn_id)
-                     if not extras:
-                         return 0
+                     extras = get_extras_from_txn_id(path, txn_id)
+                 if not extras:
+                     raise ValueError('SVN-PRE-COMMIT: Failed to extract context data in called extras for hook execution')
+                 if extras.get('rc_internal_commit'):
+                     # special marker for internal commit, we don't call hooks client
+                     return 0
                  extras['hook_type'] = 'pre_commit'
                  extras['commit_ids'] = [txn_id]
                  extras['txn_id'] = txn_id
                  extras['new_refs'] = {
                      'total_commits': 1,
                      'branches': branches,
                      'bookmarks': [],
                      'tags': tags,
                  }
                  return _call_hook('pre_push', extras, SvnMessageWriter())
              def svn_post_commit(repo_path, commit_data, env):
                  """
                  commit_data is path, rev, txn_id
                  """
                  if len(commit_data) == 3:
                      path, commit_id, txn_id = commit_data
                  elif len(commit_data) == 2:
                      log.error('Failed to extract txn_id from commit_data using legacy method. '
                                'Some functionality might be limited')
                      path, commit_id = commit_data
                      txn_id = None
                  else:
                      return 0
                  branches = []
                  tags = []
                  if env.get('RC_SCM_DATA'):
                      extras = json.loads(env['RC_SCM_DATA'])
                  else:
+                     ini_path = env.get('RC_INI_FILE')
+                     if ini_path:
+                         _get_ini_settings(ini_path)
                      # fallback method to read from TXN-ID stored data
-                     extras = _get_extras_from_commit_id(commit_id, path)
-                     if not extras:
-                         return 0
+                     extras = get_extras_from_txn_id(path, txn_id)
+                 if not extras and txn_id:
+                     raise ValueError('SVN-POST-COMMIT: Failed to extract context data in called extras for hook execution')
+                 if extras.get('rc_internal_commit'):
+                     # special marker for internal commit, we don't call hooks client
+                     return 0
                  extras['hook_type'] = 'post_commit'
                  extras['commit_ids'] = [commit_id]
                  extras['txn_id'] = txn_id
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': [],
                      'tags': tags,
                      'total_commits': 1,
                  }
                  if 'repo_size' in extras['hooks']:
                      try:
                          _call_hook('repo_size', extras, SvnMessageWriter())
                      except Exception:
                          pass
                  return _call_hook('post_push', extras, SvnMessageWriter())

vcsserver/http_main.py

0 +14 -25

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import io
              import os
              import platform
              import sys
              import locale
              import logging
              import uuid
              import time
              import wsgiref.util
              import tempfile
              import psutil
              from itertools import chain
              import msgpack
              import configparser
              from pyramid.config import Configurator
              from pyramid.wsgi import wsgiapp
              from pyramid.response import Response
              from vcsserver.base import BytesEnvelope, BinaryEnvelope
-             from vcsserver.lib.rc_json import json
              from vcsserver.config.settings_maker import SettingsMaker
-             from vcsserver.str_utils import safe_int
-             from vcsserver.lib.statsd_client import StatsdClient
              from vcsserver.tweens.request_wrapper import get_headers_call_context
-             import vcsserver
-             from vcsserver import remote_wsgi, scm_app, settings, hgpatches
+             from vcsserver import remote_wsgi, scm_app, hgpatches
+             from vcsserver.server import VcsServer
              from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
              from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
              from vcsserver.echo_stub.echo_app import EchoApp
              from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
              from vcsserver.lib.exc_tracking import store_exception, format_exc
-             from vcsserver.server import VcsServer
+             from vcsserver.lib.str_utils import safe_int
+             from vcsserver.lib.statsd_client import StatsdClient
+             from vcsserver.lib.ext_json import json
+             from vcsserver.lib.config_utils import configure_and_store_settings
              strict_vcs = True
              git_import_err = None
              try:
                  from vcsserver.remote.git_remote import GitFactory, GitRemote
              except ImportError as e:
                  GitFactory = None
                  GitRemote = None
                  git_import_err = e
                  if strict_vcs:
                      raise
              hg_import_err = None
              try:
                  from vcsserver.remote.hg_remote import MercurialFactory, HgRemote
              except ImportError as e:
                  MercurialFactory = None
                  HgRemote = None
                  hg_import_err = e
                  if strict_vcs:
                      raise
              svn_import_err = None
              try:
                  from vcsserver.remote.svn_remote import SubversionFactory, SvnRemote
              except ImportError as e:
                  SubversionFactory = None
                  SvnRemote = None
                  svn_import_err = e
                  if strict_vcs:
                      raise
              log = logging.getLogger(__name__)
              # due to Mercurial/glibc2.27 problems we need to detect if locale settings are
              # causing problems and "fix" it in case they do and fallback to LC_ALL = C
              try:
                  locale.setlocale(locale.LC_ALL, '')
              except locale.Error as e:
-                 log.error(
-                     'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
+                 log.error('LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
                  os.environ['LC_ALL'] = 'C'
              def _is_request_chunked(environ):
                  stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
                  return stream
              def log_max_fd():
                  try:
                      maxfd = psutil.Process().rlimit(psutil.RLIMIT_NOFILE)[1]
                      log.info('Max file descriptors value: %s', maxfd)
                  except Exception:
                      pass
              class VCS:
                  def __init__(self, locale_conf=None, cache_config=None):
                      self.locale = locale_conf
                      self.cache_config = cache_config
                      self._configure_locale()
                      log_max_fd()
                      if GitFactory and GitRemote:
                          git_factory = GitFactory()
                          self._git_remote = GitRemote(git_factory)
                      else:
                          log.error("Git client import failed: %s", git_import_err)
                      if MercurialFactory and HgRemote:
                          hg_factory = MercurialFactory()
                          self._hg_remote = HgRemote(hg_factory)
                      else:
                          log.error("Mercurial client import failed: %s", hg_import_err)
                      if SubversionFactory and SvnRemote:
                          svn_factory = SubversionFactory()
                          # hg factory is used for svn url validation
                          hg_factory = MercurialFactory()
                          self._svn_remote = SvnRemote(svn_factory, hg_factory=hg_factory)
                      else:
                          log.error("Subversion client import failed: %s", svn_import_err)
                      self._vcsserver = VcsServer()
                  def _configure_locale(self):
                      if self.locale:
                          log.info('Settings locale: `LC_ALL` to %s', self.locale)
                      else:
                          log.info('Configuring locale subsystem based on environment variables')
                      try:
                          # If self.locale is the empty string, then the locale
                          # module will use the environment variables. See the
                          # documentation of the package `locale`.
                          locale.setlocale(locale.LC_ALL, self.locale)
                          language_code, encoding = locale.getlocale()
                          log.info(
                              'Locale set to language code "%s" with encoding "%s".',
                              language_code, encoding)
                      except locale.Error:
                          log.exception('Cannot set locale, not configuring the locale system')
              class WsgiProxy:
                  def __init__(self, wsgi):
                      self.wsgi = wsgi
                  def __call__(self, environ, start_response):
                      input_data = environ['wsgi.input'].read()
                      input_data = msgpack.unpackb(input_data)
                      error = None
                      try:
                          data, status, headers = self.wsgi.handle(
                              input_data['environment'], input_data['input_data'],
                              *input_data['args'], **input_data['kwargs'])
                      except Exception as e:
                          data, status, headers = [], None, None
                          error = {
                              'message': str(e),
                              '_vcs_kind': getattr(e, '_vcs_kind', None)
                          }
                      start_response(200, {})
                      return self._iterator(error, status, headers, data)
                  def _iterator(self, error, status, headers, data):
                      initial_data = [
                          error,
                          status,
                          headers,
                      ]
                      for d in chain(initial_data, data):
                          yield msgpack.packb(d)
              def not_found(request):
                  return {'status': '404 NOT FOUND'}
              class VCSViewPredicate:
                  def __init__(self, val, config):
                      self.remotes = val
                  def text(self):
                      return f'vcs view method = {list(self.remotes.keys())}'
                  phash = text
                  def __call__(self, context, request):
                      """
                      View predicate that returns true if given backend is supported by
                      defined remotes.
                      """
                      backend = request.matchdict.get('backend')
                      return backend in self.remotes
              class HTTPApplication:
                  ALLOWED_EXCEPTIONS = ('KeyError', 'URLError')
                  remote_wsgi = remote_wsgi
                  _use_echo_app = False
                  def __init__(self, settings=None, global_config=None):
                      self.config = Configurator(settings=settings)
                      # Init our statsd at very start
                      self.config.registry.statsd = StatsdClient.statsd
                      self.config.registry.vcs_call_context = {}
                      self.global_config = global_config
                      self.config.include('vcsserver.lib.rc_cache')
-                     self.config.include('vcsserver.lib.rc_cache.archive_cache')
+                     self.config.include('vcsserver.lib.archive_cache')
                      settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
                      vcs = VCS(locale_conf=settings_locale, cache_config=settings)
                      self._remotes = {
                          'hg': vcs._hg_remote,
                          'git': vcs._git_remote,
                          'svn': vcs._svn_remote,
                          'server': vcs._vcsserver,
                      }
                      if settings.get('dev.use_echo_app', 'false').lower() == 'true':
                          self._use_echo_app = True
                          log.warning("Using EchoApp for VCS operations.")
                          self.remote_wsgi = remote_wsgi_stub
-                     self._configure_settings(global_config, settings)
+                     configure_and_store_settings(global_config, settings)
                      self._configure()
-                 def _configure_settings(self, global_config, app_settings):
-                     """
-                     Configure the settings module.
-                     """
-                     settings_merged = global_config.copy()
-                     settings_merged.update(app_settings)
-                     binary_dir = app_settings['core.binary_dir']
-                     settings.BINARY_DIR = binary_dir
-                     # Store the settings to make them available to other modules.
-                     vcsserver.PYRAMID_SETTINGS = settings_merged
-                     vcsserver.CONFIG = settings_merged
                  def _configure(self):
                      self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory)
                      self.config.add_route('service', '/_service')
                      self.config.add_route('status', '/status')
                      self.config.add_route('hg_proxy', '/proxy/hg')
                      self.config.add_route('git_proxy', '/proxy/git')
                      # rpc methods
                      self.config.add_route('vcs', '/{backend}')
                      # streaming rpc remote methods
                      self.config.add_route('vcs_stream', '/{backend}/stream')
                      # vcs operations clone/push as streaming
                      self.config.add_route('stream_git', '/stream/git/*repo_name')
                      self.config.add_route('stream_hg', '/stream/hg/*repo_name')
                      self.config.add_view(self.status_view, route_name='status', renderer='json')
                      self.config.add_view(self.service_view, route_name='service', renderer='msgpack')
                      self.config.add_view(self.hg_proxy(), route_name='hg_proxy')
                      self.config.add_view(self.git_proxy(), route_name='git_proxy')
                      self.config.add_view(self.vcs_view, route_name='vcs', renderer='msgpack',
                                           vcs_view=self._remotes)
                      self.config.add_view(self.vcs_stream_view, route_name='vcs_stream',
                                           vcs_view=self._remotes)
                      self.config.add_view(self.hg_stream(), route_name='stream_hg')
                      self.config.add_view(self.git_stream(), route_name='stream_git')
                      self.config.add_view_predicate('vcs_view', VCSViewPredicate)
                      self.config.add_notfound_view(not_found, renderer='json')
                      self.config.add_view(self.handle_vcs_exception, context=Exception)
                      self.config.add_tween(
                          'vcsserver.tweens.request_wrapper.RequestWrapperTween',
                      )
                      self.config.add_request_method(
                          'vcsserver.lib.request_counter.get_request_counter',
                          'request_count')
                  def wsgi_app(self):
                      return self.config.make_wsgi_app()
                  def _vcs_view_params(self, request):
                      remote = self._remotes[request.matchdict['backend']]
                      payload = msgpack.unpackb(request.body, use_list=True)
                      method = payload.get('method')
                      params = payload['params']
                      wire = params.get('wire')
                      args = params.get('args')
                      kwargs = params.get('kwargs')
                      context_uid = None
                      request.registry.vcs_call_context = {
                          'method': method,
                          'repo_name': payload.get('_repo_name'),
                      }
                      if wire:
                          try:
                              wire['context'] = context_uid = uuid.UUID(wire['context'])
                          except KeyError:
                              pass
                          args.insert(0, wire)
                      repo_state_uid = wire.get('repo_state_uid') if wire else None
                      # NOTE(marcink): trading complexity for slight performance
                      if log.isEnabledFor(logging.DEBUG):
                          # also we SKIP printing out any of those methods args since they maybe excessive
                          just_args_methods = {
                              'commitctx': ('content', 'removed', 'updated'),
                              'commit': ('content', 'removed', 'updated')
                          }
                          if method in just_args_methods:
                              skip_args = just_args_methods[method]
                              call_args = ''
                              call_kwargs = {}
                              for k in kwargs:
                                  if k in skip_args:
                                      # replace our skip key with dummy
                                      call_kwargs[k] = f'RemovedParam({k})'
                                  else:
                                      call_kwargs[k] = kwargs[k]
                          else:
                              call_args = args[1:]
                              call_kwargs = kwargs
                          log.debug('Method requested:`%s` with args:%s kwargs:%s context_uid: %s, repo_state_uid:%s',
                                    method, call_args, call_kwargs, context_uid, repo_state_uid)
                      statsd = request.registry.statsd
                      if statsd:
                          statsd.incr(
                              'vcsserver_method_total', tags=[
                                  f"method:{method}",
                              ])
                      return payload, remote, method, args, kwargs
                  def vcs_view(self, request):
                      payload, remote, method, args, kwargs = self._vcs_view_params(request)
                      payload_id = payload.get('id')
                      try:
                          resp = getattr(remote, method)(*args, **kwargs)
                      except Exception as e:
                          exc_info = list(sys.exc_info())
                          exc_type, exc_value, exc_traceback = exc_info
                          org_exc = getattr(e, '_org_exc', None)
                          org_exc_name = None
                          org_exc_tb = ''
                          if org_exc:
                              org_exc_name = org_exc.__class__.__name__
                              org_exc_tb = getattr(e, '_org_exc_tb', '')
                              # replace our "faked" exception with our org
                              exc_info[0] = org_exc.__class__
                              exc_info[1] = org_exc
                          should_store_exc = True
                          if org_exc:
                              def get_exc_fqn(_exc_obj):
                                  module_name = getattr(org_exc.__class__, '__module__', 'UNKNOWN')
                                  return module_name + '.' + org_exc_name
                              exc_fqn = get_exc_fqn(org_exc)
                              if exc_fqn in ['mercurial.error.RepoLookupError',
                                             'vcsserver.exceptions.RefNotFoundException']:
                                  should_store_exc = False
                          if should_store_exc:
                              store_exception(id(exc_info), exc_info, request_path=request.path)
                          tb_info = format_exc(exc_info)
                          type_ = e.__class__.__name__
                          if type_ not in self.ALLOWED_EXCEPTIONS:
                              type_ = None
                          resp = {
                              'id': payload_id,
                              'error': {
                                  'message': str(e),
                                  'traceback': tb_info,
                                  'org_exc': org_exc_name,
                                  'org_exc_tb': org_exc_tb,
                                  'type': type_
                              }
                          }
                          try:
                              resp['error']['_vcs_kind'] = getattr(e, '_vcs_kind', None)
                          except AttributeError:
                              pass
                      else:
                          resp = {
                              'id': payload_id,
                              'result': resp
                          }
                      log.debug('Serving data for method %s', method)
                      return resp
                  def vcs_stream_view(self, request):
                      payload, remote, method, args, kwargs = self._vcs_view_params(request)
                      # this method has a stream: marker we remove it here
                      method = method.split('stream:')[-1]
                      chunk_size = safe_int(payload.get('chunk_size')) or 4096
                      resp = getattr(remote, method)(*args, **kwargs)
                      def get_chunked_data(method_resp):
                          stream = io.BytesIO(method_resp)
                          while 1:
                              chunk = stream.read(chunk_size)
                              if not chunk:
                                  break
                              yield chunk
                      response = Response(app_iter=get_chunked_data(resp))
                      response.content_type = 'application/octet-stream'
                      return response
                  def status_view(self, request):
                      import vcsserver
                      _platform_id = platform.uname()[1] or 'instance'
                      return {
                          "status": "OK",
                          "vcsserver_version": vcsserver.get_version(),
                          "platform": _platform_id,
                          "pid": os.getpid(),
                      }
                  def service_view(self, request):
                      import vcsserver
                      payload = msgpack.unpackb(request.body, use_list=True)
                      server_config, app_config = {}, {}
                      try:
                          path = self.global_config['__file__']
                          config = configparser.RawConfigParser()
                          config.read(path)
                          if config.has_section('server:main'):
                              server_config = dict(config.items('server:main'))
                          if config.has_section('app:main'):
                              app_config = dict(config.items('app:main'))
                      except Exception:
                          log.exception('Failed to read .ini file for display')
                      environ = list(os.environ.items())
                      resp = {
                          'id': payload.get('id'),
                          'result': dict(
                              version=vcsserver.get_version(),
                              config=server_config,
                              app_config=app_config,
                              environ=environ,
                              payload=payload,
                          )
                      }
                      return resp
                  def _msgpack_renderer_factory(self, info):
                      def _render(value, system):
                          bin_type = False
                          res = value.get('result')
                          if isinstance(res, BytesEnvelope):
                              log.debug('Result is wrapped in BytesEnvelope type')
                              bin_type = True
                          elif isinstance(res, BinaryEnvelope):
                              log.debug('Result is wrapped in BinaryEnvelope type')
                              value['result'] = res.val
                              bin_type = True
                          request = system.get('request')
                          if request is not None:
                              response = request.response
                              ct = response.content_type
                              if ct == response.default_content_type:
                                  response.content_type = 'application/x-msgpack'
                                  if bin_type:
                                      response.content_type = 'application/x-msgpack-bin'
                          return msgpack.packb(value, use_bin_type=bin_type)
                      return _render
                  def set_env_from_config(self, environ, config):
                      dict_conf = {}
                      try:
                          for elem in config:
                              if elem[0] == 'rhodecode':
                                  dict_conf = json.loads(elem[2])
                                  break
                      except Exception:
                          log.exception('Failed to fetch SCM CONFIG')
                          return
                      username = dict_conf.get('username')
                      if username:
                          environ['REMOTE_USER'] = username
                          # mercurial specific, some extension api rely on this
                          environ['HGUSER'] = username
                      ip = dict_conf.get('ip')
                      if ip:
                          environ['REMOTE_HOST'] = ip
                      if _is_request_chunked(environ):
                          # set the compatibility flag for webob
                          environ['wsgi.input_terminated'] = True
                  def hg_proxy(self):
                      @wsgiapp
                      def _hg_proxy(environ, start_response):
                          app = WsgiProxy(self.remote_wsgi.HgRemoteWsgi())
                          return app(environ, start_response)
                      return _hg_proxy
                  def git_proxy(self):
                      @wsgiapp
                      def _git_proxy(environ, start_response):
                          app = WsgiProxy(self.remote_wsgi.GitRemoteWsgi())
                          return app(environ, start_response)
                      return _git_proxy
                  def hg_stream(self):
                      if self._use_echo_app:
                          @wsgiapp
                          def _hg_stream(environ, start_response):
                              app = EchoApp('fake_path', 'fake_name', None)
                              return app(environ, start_response)
                          return _hg_stream
                      else:
                          @wsgiapp
                          def _hg_stream(environ, start_response):
                              log.debug('http-app: handling hg stream')
                              call_context = get_headers_call_context(environ)
                              repo_path = call_context['repo_path']
                              repo_name = call_context['repo_name']
                              config = call_context['repo_config']
                              app = scm_app.create_hg_wsgi_app(
                                  repo_path, repo_name, config)
                              # Consistent path information for hgweb
                              environ['PATH_INFO'] = call_context['path_info']
                              environ['REPO_NAME'] = repo_name
                              self.set_env_from_config(environ, config)
                              log.debug('http-app: starting app handler '
                                        'with %s and process request', app)
                              return app(environ, ResponseFilter(start_response))
                          return _hg_stream
                  def git_stream(self):
                      if self._use_echo_app:
                          @wsgiapp
                          def _git_stream(environ, start_response):
                              app = EchoApp('fake_path', 'fake_name', None)
                              return app(environ, start_response)
                          return _git_stream
                      else:
                          @wsgiapp
                          def _git_stream(environ, start_response):
                              log.debug('http-app: handling git stream')
                              call_context = get_headers_call_context(environ)
                              repo_path = call_context['repo_path']
                              repo_name = call_context['repo_name']
                              config = call_context['repo_config']
                              environ['PATH_INFO'] = call_context['path_info']
                              self.set_env_from_config(environ, config)
                              content_type = environ.get('CONTENT_TYPE', '')
                              path = environ['PATH_INFO']
                              is_lfs_request = GIT_LFS_CONTENT_TYPE in content_type
                              log.debug(
                                  'LFS: Detecting if request `%s` is LFS server path based '
                                  'on content type:`%s`, is_lfs:%s',
                                  path, content_type, is_lfs_request)
                              if not is_lfs_request:
                                  # fallback detection by path
                                  if GIT_LFS_PROTO_PAT.match(path):
                                      is_lfs_request = True
                                  log.debug(
                                      'LFS: fallback detection by path of: `%s`, is_lfs:%s',
                                      path, is_lfs_request)
                              if is_lfs_request:
                                  app = scm_app.create_git_lfs_wsgi_app(
                                      repo_path, repo_name, config)
                              else:
                                  app = scm_app.create_git_wsgi_app(
                                      repo_path, repo_name, config)
                              log.debug('http-app: starting app handler '
                                        'with %s and process request', app)
                              return app(environ, start_response)
                          return _git_stream
                  def handle_vcs_exception(self, exception, request):
                      _vcs_kind = getattr(exception, '_vcs_kind', '')
                      if _vcs_kind == 'repo_locked':
                          headers_call_context = get_headers_call_context(request.environ)
                          status_code = safe_int(headers_call_context['locked_status_code'])
                          return HTTPRepoLocked(
                              title=str(exception), status_code=status_code, headers=[('X-Rc-Locked', '1')])
                      elif _vcs_kind == 'repo_branch_protected':
                          # Get custom repo-branch-protected status code if present.
                          return HTTPRepoBranchProtected(
                              title=str(exception), headers=[('X-Rc-Branch-Protection', '1')])
                      exc_info = request.exc_info
                      store_exception(id(exc_info), exc_info)
                      traceback_info = 'unavailable'
                      if request.exc_info:
                          traceback_info = format_exc(request.exc_info)
                      log.error(
                          'error occurred handling this request for path: %s, \n%s',
                          request.path, traceback_info)
                      statsd = request.registry.statsd
                      if statsd:
                          exc_type = f"{exception.__class__.__module__}.{exception.__class__.__name__}"
                          statsd.incr('vcsserver_exception_total',
                                      tags=[f"type:{exc_type}"])
                      raise exception
              class ResponseFilter:
                  def __init__(self, start_response):
                      self._start_response = start_response
                  def __call__(self, status, response_headers, exc_info=None):
                      headers = tuple(
                          (h, v) for h, v in response_headers
                          if not wsgiref.util.is_hop_by_hop(h))
                      return self._start_response(status, headers, exc_info)
              def sanitize_settings_and_apply_defaults(global_config, settings):
                  _global_settings_maker = SettingsMaker(global_config)
                  settings_maker = SettingsMaker(settings)
                  settings_maker.make_setting('logging.autoconfigure', False, parser='bool')
                  logging_conf = os.path.join(os.path.dirname(global_config.get('__file__')), 'logging.ini')
                  settings_maker.enable_logging(logging_conf)
                  # Default includes, possible to change as a user
                  pyramid_includes = settings_maker.make_setting('pyramid.includes', [], parser='list:newline')
                  log.debug("Using the following pyramid.includes: %s", pyramid_includes)
                  settings_maker.make_setting('__file__', global_config.get('__file__'))
                  settings_maker.make_setting('pyramid.default_locale_name', 'en')
                  settings_maker.make_setting('locale', 'en_US.UTF-8')
                  settings_maker.make_setting(
                      'core.binary_dir', '/usr/local/bin/rhodecode_bin/vcs_bin',
                      default_when_empty=True, parser='string:noquote')
+                 settings_maker.make_setting('vcs.svn.redis_conn', 'redis://redis:6379/0')
                  temp_store = tempfile.gettempdir()
                  default_cache_dir = os.path.join(temp_store, 'rc_cache')
                  # save default, cache dir, and use it for all backends later.
                  default_cache_dir = settings_maker.make_setting(
                      'cache_dir',
                      default=default_cache_dir, default_when_empty=True,
                      parser='dir:ensured')
                  # exception store cache
                  settings_maker.make_setting(
                      'exception_tracker.store_path',
                      default=os.path.join(default_cache_dir, 'exc_store'), default_when_empty=True,
                      parser='dir:ensured'
                  )
                  # repo_object cache defaults
                  settings_maker.make_setting(
                      'rc_cache.repo_object.backend',
                      default='dogpile.cache.rc.file_namespace',
                      parser='string')
                  settings_maker.make_setting(
                      'rc_cache.repo_object.expiration_time',
                      default=30 * 24 * 60 * 60,  # 30days
                      parser='int')
                  settings_maker.make_setting(
                      'rc_cache.repo_object.arguments.filename',
                      default=os.path.join(default_cache_dir, 'vcsserver_cache_repo_object.db'),
                      parser='string')
                  # statsd
                  settings_maker.make_setting('statsd.enabled', False, parser='bool')
                  settings_maker.make_setting('statsd.statsd_host', 'statsd-exporter', parser='string')
                  settings_maker.make_setting('statsd.statsd_port', 9125, parser='int')
                  settings_maker.make_setting('statsd.statsd_prefix', '')
                  settings_maker.make_setting('statsd.statsd_ipv6', False, parser='bool')
                  settings_maker.env_expand()
              def main(global_config, **settings):
                  start_time = time.time()
                  log.info('Pyramid app config starting')
                  if MercurialFactory:
                      hgpatches.patch_largefiles_capabilities()
                      hgpatches.patch_subrepo_type_mapping()
                  # Fill in and sanitize the defaults & do ENV expansion
                  sanitize_settings_and_apply_defaults(global_config, settings)
                  # init and bootstrap StatsdClient
                  StatsdClient.setup(settings)
                  pyramid_app = HTTPApplication(settings=settings, global_config=global_config).wsgi_app()
                  total_time = time.time() - start_time
                  log.info('Pyramid app created and configured in %.2fs', total_time)
                  return pyramid_app

vcsserver/lib/_vendor/jsonlogger/__init__.py

0 +2 -8

              '''
              This library is provided to allow standard python logging
              to output log data as JSON formatted strings
              '''
              import logging
-             import json
              import re
              from datetime import date, datetime, time, tzinfo, timedelta
              import traceback
              import importlib
              from inspect import istraceback
              from collections import OrderedDict
-             def _inject_req_id(record, *args, **kwargs):
-                 return record
-             ExceptionAwareFormatter = logging.Formatter
+             from ...logging_formatter import _inject_req_id, ExceptionAwareFormatter
+             from ...ext_json import sjson as json
              ZERO = timedelta(0)
              HOUR = timedelta(hours=1)
              class UTC(tzinfo):
                  """UTC"""
                  def utcoffset(self, dt):
                      return ZERO
                  def tzname(self, dt):
                      return "UTC"
                  def dst(self, dt):
                      return ZERO
              utc = UTC()
              # skip natural LogRecord attributes
              # http://docs.python.org/library/logging.html#logrecord-attributes
              RESERVED_ATTRS = (
                  'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename',
                  'funcName', 'levelname', 'levelno', 'lineno', 'module',
                  'msecs', 'message', 'msg', 'name', 'pathname', 'process',
                  'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName')
              def merge_record_extra(record, target, reserved):
                  """
                  Merges extra attributes from LogRecord object into target dictionary
                  :param record: logging.LogRecord
                  :param target: dict to update
                  :param reserved: dict or list with reserved keys to skip
                  """
                  for key, value in record.__dict__.items():
                      # this allows to have numeric keys
                      if (key not in reserved
                          and not (hasattr(key, "startswith")
                                   and key.startswith('_'))):
                          target[key] = value
                  return target
              class JsonEncoder(json.JSONEncoder):
                  """
                  A custom encoder extending the default JSONEncoder
                  """
                  def default(self, obj):
                      if isinstance(obj, (date, datetime, time)):
                          return self.format_datetime_obj(obj)
                      elif istraceback(obj):
                          return ''.join(traceback.format_tb(obj)).strip()
                      elif type(obj) == Exception \
                              or isinstance(obj, Exception) \
                              or type(obj) == type:
                          return str(obj)
                      try:
                          return super().default(obj)
                      except TypeError:
                          try:
                              return str(obj)
                          except Exception:
                              return None
                  def format_datetime_obj(self, obj):
                      return obj.isoformat()
              class JsonFormatter(ExceptionAwareFormatter):
                  """
                  A custom formatter to format logging records as json strings.
                  Extra values will be formatted as str() if not supported by
                  json default encoder
                  """
                  def __init__(self, *args, **kwargs):
                      """
                      :param json_default: a function for encoding non-standard objects
                          as outlined in http://docs.python.org/2/library/json.html
                      :param json_encoder: optional custom encoder
                      :param json_serializer: a :meth:`json.dumps`-compatible callable
                          that will be used to serialize the log record.
                      :param json_indent: an optional :meth:`json.dumps`-compatible numeric value
                          that will be used to customize the indent of the output json.
                      :param prefix: an optional string prefix added at the beginning of
                          the formatted string
                      :param json_indent: indent parameter for json.dumps
                      :param json_ensure_ascii: ensure_ascii parameter for json.dumps
                      :param reserved_attrs: an optional list of fields that will be skipped when
                          outputting json log record. Defaults to all log record attributes:
                          http://docs.python.org/library/logging.html#logrecord-attributes
                      :param timestamp: an optional string/boolean field to add a timestamp when
                          outputting the json log record. If string is passed, timestamp will be added
                          to log record using string as key. If True boolean is passed, timestamp key
                          will be "timestamp". Defaults to False/off.
                      """
                      self.json_default = self._str_to_fn(kwargs.pop("json_default", None))
                      self.json_encoder = self._str_to_fn(kwargs.pop("json_encoder", None))
                      self.json_serializer = self._str_to_fn(kwargs.pop("json_serializer", json.dumps))
                      self.json_indent = kwargs.pop("json_indent", None)
                      self.json_ensure_ascii = kwargs.pop("json_ensure_ascii", True)
                      self.prefix = kwargs.pop("prefix", "")
                      reserved_attrs = kwargs.pop("reserved_attrs", RESERVED_ATTRS)
                      self.reserved_attrs = dict(list(zip(reserved_attrs, reserved_attrs)))
                      self.timestamp = kwargs.pop("timestamp", True)
                      # super(JsonFormatter, self).__init__(*args, **kwargs)
                      logging.Formatter.__init__(self, *args, **kwargs)
                      if not self.json_encoder and not self.json_default:
                          self.json_encoder = JsonEncoder
                      self._required_fields = self.parse()
                      self._skip_fields = dict(list(zip(self._required_fields,
                                                    self._required_fields)))
                      self._skip_fields.update(self.reserved_attrs)
                  def _str_to_fn(self, fn_as_str):
                      """
                      If the argument is not a string, return whatever was passed in.
                      Parses a string such as package.module.function, imports the module
                      and returns the function.
                      :param fn_as_str: The string to parse. If not a string, return it.
                      """
                      if not isinstance(fn_as_str, str):
                          return fn_as_str
                      path, _, function = fn_as_str.rpartition('.')
                      module = importlib.import_module(path)
                      return getattr(module, function)
                  def parse(self):
                      """
                      Parses format string looking for substitutions
                      This method is responsible for returning a list of fields (as strings)
                      to include in all log messages.
                      """
                      standard_formatters = re.compile(r'\((.+?)\)', re.IGNORECASE)
                      return standard_formatters.findall(self._fmt)
                  def add_fields(self, log_record, record, message_dict):
                      """
                      Override this method to implement custom logic for adding fields.
                      """
                      for field in self._required_fields:
                          log_record[field] = record.__dict__.get(field)
                      log_record.update(message_dict)
                      merge_record_extra(record, log_record, reserved=self._skip_fields)
                      if self.timestamp:
                          key = self.timestamp if type(self.timestamp) == str else 'timestamp'
                          log_record[key] = datetime.fromtimestamp(record.created, tz=utc)
                  def process_log_record(self, log_record):
                      """
                      Override this method to implement custom logic
                      on the possibly ordered dictionary.
                      """
                      return log_record
                  def jsonify_log_record(self, log_record):
                      """Returns a json string of the log record."""
                      return self.json_serializer(log_record,
                                                  default=self.json_default,
                                                  cls=self.json_encoder,
                                                  indent=self.json_indent,
                                                  ensure_ascii=self.json_ensure_ascii)
                  def serialize_log_record(self, log_record):
                      """Returns the final representation of the log record."""
                      return "{}{}".format(self.prefix, self.jsonify_log_record(log_record))
                  def format(self, record):
                      """Formats a log record and serializes to json"""
                      message_dict = {}
                      # FIXME: logging.LogRecord.msg and logging.LogRecord.message in typeshed
                      #        are always type of str. We shouldn't need to override that.
                      if isinstance(record.msg, dict):
                          message_dict = record.msg
                          record.message = None
                      else:
                          record.message = record.getMessage()
                      # only format time if needed
                      if "asctime" in self._required_fields:
                          record.asctime = self.formatTime(record, self.datefmt)
                      # Display formatted exception, but allow overriding it in the
                      # user-supplied dict.
                      if record.exc_info and not message_dict.get('exc_info'):
                          message_dict['exc_info'] = self.formatException(record.exc_info)
                      if not message_dict.get('exc_info') and record.exc_text:
                          message_dict['exc_info'] = record.exc_text
                      # Display formatted record of stack frames
                      # default format is a string returned from :func:`traceback.print_stack`
                      try:
                          if record.stack_info and not message_dict.get('stack_info'):
                              message_dict['stack_info'] = self.formatStack(record.stack_info)
                      except AttributeError:
                          # Python2.7 doesn't have stack_info.
                          pass
                      try:
                          log_record = OrderedDict()
                      except NameError:
                          log_record = {}
                      _inject_req_id(record, with_prefix=False)
                      self.add_fields(log_record, record, message_dict)
                      log_record = self.process_log_record(log_record)
                      return self.serialize_log_record(log_record)

vcsserver/lib/_vendor/redis_lock/__init__.py

0 +11 -3

              import threading
              import weakref
              from base64 import b64encode
              from logging import getLogger
              from os import urandom
              from typing import Union
              from redis import StrictRedis
              __version__ = '4.0.0'
              loggers = {
                  k: getLogger("vcsserver." + ".".join((__name__, k)))
                  for k in [
                      "acquire",
                      "refresh.thread.start",
                      "refresh.thread.stop",
                      "refresh.thread.exit",
                      "refresh.start",
                      "refresh.shutdown",
                      "refresh.exit",
                      "release",
                  ]
              }
              text_type = str
              binary_type = bytes
              # Check if the id match. If not, return an error code.
              UNLOCK_SCRIPT = b"""
                  if redis.call("get", KEYS[1]) ~= ARGV[1] then
                      return 1
                  else
                      redis.call("del", KEYS[2])
                      redis.call("lpush", KEYS[2], 1)
                      redis.call("pexpire", KEYS[2], ARGV[2])
                      redis.call("del", KEYS[1])
                      return 0
                  end
              """
              # Covers both cases when key doesn't exist and doesn't equal to lock's id
              EXTEND_SCRIPT = b"""
                  if redis.call("get", KEYS[1]) ~= ARGV[1] then
                      return 1
                  elseif redis.call("ttl", KEYS[1]) < 0 then
                      return 2
                  else
                      redis.call("expire", KEYS[1], ARGV[2])
                      return 0
                  end
              """
              RESET_SCRIPT = b"""
                  redis.call('del', KEYS[2])
                  redis.call('lpush', KEYS[2], 1)
                  redis.call('pexpire', KEYS[2], ARGV[2])
                  return redis.call('del', KEYS[1])
              """
              RESET_ALL_SCRIPT = b"""
                  local locks = redis.call('keys', 'lock:*')
                  local signal
                  for _, lock in pairs(locks) do
                      signal = 'lock-signal:' .. string.sub(lock, 6)
                      redis.call('del', signal)
                      redis.call('lpush', signal, 1)
                      redis.call('expire', signal, 1)
                      redis.call('del', lock)
                  end
                  return #locks
              """
              class AlreadyAcquired(RuntimeError):
                  pass
              class NotAcquired(RuntimeError):
                  pass
              class AlreadyStarted(RuntimeError):
                  pass
              class TimeoutNotUsable(RuntimeError):
                  pass
              class InvalidTimeout(RuntimeError):
                  pass
              class TimeoutTooLarge(RuntimeError):
                  pass
              class NotExpirable(RuntimeError):
                  pass
              class Lock:
                  """
                  A Lock context manager implemented via redis SETNX/BLPOP.
                  """
                  unlock_script = None
                  extend_script = None
                  reset_script = None
                  reset_all_script = None
+                 blocking = None
                  _lock_renewal_interval: float
                  _lock_renewal_thread: Union[threading.Thread, None]
-                 def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000):
+                 def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000, blocking=True):
                      """
                      :param redis_client:
                          An instance of :class:`~StrictRedis`.
                      :param name:
                          The name (redis key) the lock should have.
                      :param expire:
                          The lock expiry time in seconds. If left at the default (None)
                          the lock will not expire.
                      :param id:
                          The ID (redis value) the lock should have. A random value is
                          generated when left at the default.
                          Note that if you specify this then the lock is marked as "held". Acquires
                          won't be possible.
                      :param auto_renewal:
                          If set to ``True``, Lock will automatically renew the lock so that it
                          doesn't expire for as long as the lock is held (acquire() called
                          or running in a context manager).
                          Implementation note: Renewal will happen using a daemon thread with
                          an interval of ``expire*2/3``. If wishing to use a different renewal
                          time, subclass Lock, call ``super().__init__()`` then set
                          ``self._lock_renewal_interval`` to your desired interval.
                      :param strict:
                          If set ``True`` then the ``redis_client`` needs to be an instance of ``redis.StrictRedis``.
                      :param signal_expire:
                          Advanced option to override signal list expiration in milliseconds. Increase it for very slow clients. Default: ``1000``.
+                     :param blocking:
+                         Boolean value specifying whether lock should be blocking or not.
+                         Used in `__enter__` method.
                      """
                      if strict and not isinstance(redis_client, StrictRedis):
                          raise ValueError("redis_client must be instance of StrictRedis. "
                                           "Use strict=False if you know what you're doing.")
                      if auto_renewal and expire is None:
                          raise ValueError("Expire may not be None when auto_renewal is set")
                      self._client = redis_client
                      if expire:
                          expire = int(expire)
                          if expire < 0:
                              raise ValueError("A negative expire is not acceptable.")
                      else:
                          expire = None
                      self._expire = expire
                      self._signal_expire = signal_expire
                      if id is None:
                          self._id = b64encode(urandom(18)).decode('ascii')
                      elif isinstance(id, binary_type):
                          try:
                              self._id = id.decode('ascii')
                          except UnicodeDecodeError:
                              self._id = b64encode(id).decode('ascii')
                      elif isinstance(id, text_type):
                          self._id = id
                      else:
                          raise TypeError(f"Incorrect type for `id`. Must be bytes/str not {type(id)}.")
                      self._name = 'lock:' + name
                      self._signal = 'lock-signal:' + name
                      self._lock_renewal_interval = (float(expire) * 2 / 3
                                                     if auto_renewal
                                                     else None)
                      self._lock_renewal_thread = None
+                     self.blocking = blocking
                      self.register_scripts(redis_client)
                  @classmethod
                  def register_scripts(cls, redis_client):
                      global reset_all_script
                      if reset_all_script is None:
                          cls.unlock_script = redis_client.register_script(UNLOCK_SCRIPT)
                          cls.extend_script = redis_client.register_script(EXTEND_SCRIPT)
                          cls.reset_script = redis_client.register_script(RESET_SCRIPT)
                          cls.reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
                          reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
                  @property
                  def _held(self):
                      return self.id == self.get_owner_id()
                  def reset(self):
                      """
                      Forcibly deletes the lock. Use this with care.
                      """
                      self.reset_script(client=self._client, keys=(self._name, self._signal), args=(self.id, self._signal_expire))
                  @property
                  def id(self):
                      return self._id
                  def get_owner_id(self):
                      owner_id = self._client.get(self._name)
                      if isinstance(owner_id, binary_type):
                          owner_id = owner_id.decode('ascii', 'replace')
                      return owner_id
                  def acquire(self, blocking=True, timeout=None):
                      """
                      :param blocking:
                          Boolean value specifying whether lock should be blocking or not.
                      :param timeout:
                          An integer value specifying the maximum number of seconds to block.
                      """
                      logger = loggers["acquire"]
                      logger.debug("Getting blocking: %s acquire on %r ...", blocking, self._name)
                      if self._held:
                          owner_id = self.get_owner_id()
                          raise AlreadyAcquired("Already acquired from this Lock instance. Lock id: {}".format(owner_id))
                      if not blocking and timeout is not None:
                          raise TimeoutNotUsable("Timeout cannot be used if blocking=False")
                      if timeout:
                          timeout = int(timeout)
                          if timeout < 0:
                              raise InvalidTimeout(f"Timeout ({timeout}) cannot be less than or equal to 0")
                          if self._expire and not self._lock_renewal_interval and timeout > self._expire:
                              raise TimeoutTooLarge(f"Timeout ({timeout}) cannot be greater than expire ({self._expire})")
                      busy = True
                      blpop_timeout = timeout or self._expire or 0
                      timed_out = False
                      while busy:
                          busy = not self._client.set(self._name, self._id, nx=True, ex=self._expire)
                          if busy:
                              if timed_out:
                                  return False
                              elif blocking:
                                  timed_out = not self._client.blpop(self._signal, blpop_timeout) and timeout
                              else:
                                  logger.warning("Failed to acquire Lock(%r).", self._name)
                                  return False
                      logger.debug("Acquired Lock(%r).", self._name)
                      if self._lock_renewal_interval is not None:
                          self._start_lock_renewer()
                      return True
                  def extend(self, expire=None):
                      """
                      Extends expiration time of the lock.
                      :param expire:
                          New expiration time. If ``None`` - `expire` provided during
                          lock initialization will be taken.
                      """
                      if expire:
                          expire = int(expire)
                          if expire < 0:
                              raise ValueError("A negative expire is not acceptable.")
                      elif self._expire is not None:
                          expire = self._expire
                      else:
                          raise TypeError(
                              "To extend a lock 'expire' must be provided as an "
                              "argument to extend() method or at initialization time."
                          )
                      error = self.extend_script(client=self._client, keys=(self._name, self._signal), args=(self._id, expire))
                      if error == 1:
                          raise NotAcquired(f"Lock {self._name} is not acquired or it already expired.")
                      elif error == 2:
                          raise NotExpirable(f"Lock {self._name} has no assigned expiration time")
                      elif error:
                          raise RuntimeError(f"Unsupported error code {error} from EXTEND script")
                  @staticmethod
                  def _lock_renewer(name, lockref, interval, stop):
                      """
                      Renew the lock key in redis every `interval` seconds for as long
                      as `self._lock_renewal_thread.should_exit` is False.
                      """
                      while not stop.wait(timeout=interval):
                          loggers["refresh.thread.start"].debug("Refreshing Lock(%r).", name)
                          lock: "Lock" = lockref()
                          if lock is None:
                              loggers["refresh.thread.stop"].debug(
                                  "Stopping loop because Lock(%r) was garbage collected.", name
                              )
                              break
                          lock.extend(expire=lock._expire)
                          del lock
                      loggers["refresh.thread.exit"].debug("Exiting renewal thread for Lock(%r).", name)
                  def _start_lock_renewer(self):
                      """
                      Starts the lock refresher thread.
                      """
                      if self._lock_renewal_thread is not None:
                          raise AlreadyStarted("Lock refresh thread already started")
                      loggers["refresh.start"].debug(
                          "Starting renewal thread for Lock(%r). Refresh interval: %s seconds.",
                          self._name, self._lock_renewal_interval
                      )
                      self._lock_renewal_stop = threading.Event()
                      self._lock_renewal_thread = threading.Thread(
                          group=None,
                          target=self._lock_renewer,
                          kwargs={
                              'name': self._name,
                              'lockref': weakref.ref(self),
                              'interval': self._lock_renewal_interval,
                              'stop': self._lock_renewal_stop,
                          },
                      )
                      self._lock_renewal_thread.daemon = True
                      self._lock_renewal_thread.start()
                  def _stop_lock_renewer(self):
                      """
                      Stop the lock renewer.
                      This signals the renewal thread and waits for its exit.
                      """
                      if self._lock_renewal_thread is None or not self._lock_renewal_thread.is_alive():
                          return
                      loggers["refresh.shutdown"].debug("Signaling renewal thread for Lock(%r) to exit.", self._name)
                      self._lock_renewal_stop.set()
                      self._lock_renewal_thread.join()
                      self._lock_renewal_thread = None
                      loggers["refresh.exit"].debug("Renewal thread for Lock(%r) exited.", self._name)
                  def __enter__(self):
-                     acquired = self.acquire(blocking=True)
+                     acquired = self.acquire(blocking=self.blocking)
                      if not acquired:
-                         raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!")
+                         if self.blocking:
+                             raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!")
+                         raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.")
                      return self
                  def __exit__(self, exc_type=None, exc_value=None, traceback=None):
                      self.release()
                  def release(self):
                      """Releases the lock, that was acquired with the same object.
                      .. note::
                          If you want to release a lock that you acquired in a different place you have two choices:
                          * Use ``Lock("name", id=id_from_other_place).release()``
                          * Use ``Lock("name").reset()``
                      """
                      if self._lock_renewal_thread is not None:
                          self._stop_lock_renewer()
                      loggers["release"].debug("Releasing Lock(%r).", self._name)
                      error = self.unlock_script(client=self._client, keys=(self._name, self._signal), args=(self._id, self._signal_expire))
                      if error == 1:
                          raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.")
                      elif error:
                          raise RuntimeError(f"Unsupported error code {error} from EXTEND script.")
                  def locked(self):
                      """
                      Return true if the lock is acquired.
                      Checks that lock with same name already exists. This method returns true, even if
                      lock have another id.
                      """
                      return self._client.exists(self._name) == 1
              reset_all_script = None
              def reset_all(redis_client):
                  """
                  Forcibly deletes all locks if its remains (like a crash reason). Use this with care.
                  :param redis_client:
                      An instance of :class:`~StrictRedis`.
                  """
                  Lock.register_scripts(redis_client)
                  reset_all_script(client=redis_client)  # noqa

vcsserver/lib/_vendor/statsd/__init__.py

0 +3 -2

              import logging
              from .stream import TCPStatsClient, UnixSocketStatsClient  # noqa
              from .udp import StatsClient  # noqa
              HOST = 'localhost'
              PORT = 8125
              IPV6 = False
              PREFIX = None
              MAXUDPSIZE = 512
              log = logging.getLogger('rhodecode.statsd')
              def statsd_config(config, prefix='statsd.'):
                  _config = {}
-                 for key in config.keys():
+                 for key in list(config.keys()):
                      if key.startswith(prefix):
                          _config[key[len(prefix):]] = config[key]
                  return _config
              def client_from_config(configuration, prefix='statsd.', **kwargs):
                  from pyramid.settings import asbool
                  _config = statsd_config(configuration, prefix)
+                 statsd_flag = _config.get('enabled')
                  statsd_enabled = asbool(_config.pop('enabled', False))
                  if not statsd_enabled:
-                     log.debug('statsd client not enabled by statsd.enabled =  flag, skipping...')
+                     log.debug('statsd client not enabled by statsd.enabled = %s flag, skipping...', statsd_flag)
                      return
                  host = _config.pop('statsd_host', HOST)
                  port = _config.pop('statsd_port', PORT)
                  prefix = _config.pop('statsd_prefix', PREFIX)
                  maxudpsize = _config.pop('statsd_maxudpsize', MAXUDPSIZE)
                  ipv6 = asbool(_config.pop('statsd_ipv6', IPV6))
                  log.debug('configured statsd client %s:%s', host, port)
                  try:
                      client = StatsClient(
                          host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
                  except Exception:
                      log.exception('StatsD is enabled, but failed to connect to statsd server, fallback: disable statsd')
                      client = None
                  return client
              def get_statsd_client(request):
                  return client_from_config(request.registry.settings)

vcsserver/lib/config_utils.py

0 +18 0

              # Copyright (C) 2010-2023 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              import os
+             import vcsserver
+             import vcsserver.settings
              def get_config(ini_path, **kwargs):
                  import configparser
                  parser = configparser.ConfigParser(**kwargs)
                  parser.read(ini_path)
                  return parser
              def get_app_config_lightweight(ini_path):
                  parser = get_config(ini_path)
                  parser.set('app:main', 'here', os.getcwd())
                  parser.set('app:main', '__file__', ini_path)
                  return dict(parser.items('app:main'))
              def get_app_config(ini_path):
                  """
                  This loads the app context and provides a heavy type iniliaziation of config
                  """
                  from paste.deploy.loadwsgi import appconfig
                  return appconfig(f'config:{ini_path}', relative_to=os.getcwd())
+             def configure_and_store_settings(global_config, app_settings):
+                 """
+                 Configure the settings module.
+                 """
+                 settings_merged = global_config.copy()
+                 settings_merged.update(app_settings)
+                 binary_dir = app_settings['core.binary_dir']
+                 vcsserver.settings.BINARY_DIR = binary_dir
+                 # Store the settings to make them available to other modules.
+                 vcsserver.PYRAMID_SETTINGS = settings_merged
+                 vcsserver.CONFIG = settings_merged

vcsserver/lib/ext_json.py ~~vcsserver/lib/rc_json.py~~

0 renamed +14 -2

		@@ -1,2 +1,14 b''
1		# use orjson by default
2		import orjson as json
	1	import json as stdlib_json
	2
	3	try:
	4	# we keep simplejson for having dump functionality still
	5	# orjson doesn't support it
	6	import simplejson as sjson
	7	except ImportError:
	8	sjson = stdlib_json
	9
	10	try:
	11	import orjson
	12	import orjson as json
	13	except ImportError:
	14	json = stdlib_json

vcsserver/lib/logging_formatter.py

0 +11 -1

              # Copyright (C) 2010-2023 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              import sys
              import logging
              BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(30, 38))
              # Sequences
              RESET_SEQ = "\033[0m"
              COLOR_SEQ = "\033[0;%dm"
              BOLD_SEQ = "\033[1m"
              COLORS = {
                  'CRITICAL': MAGENTA,
                  'ERROR': RED,
                  'WARNING': CYAN,
                  'INFO': GREEN,
                  'DEBUG': BLUE,
                  'SQL': YELLOW
              }
+             def _inject_req_id(record, *args, **kwargs):
+                 return record
+             class ExceptionAwareFormatter(logging.Formatter):
+                 pass
              class ColorFormatter(logging.Formatter):
                  def format(self, record):
                      """
-                     Change record's levelname to use with COLORS enum
+                     Changes record's levelname to use with COLORS enum
                      """
                      def_record = super().format(record)
                      levelname = record.levelname
                      start = COLOR_SEQ % (COLORS[levelname])
                      end = RESET_SEQ
                      colored_record = ''.join([start, def_record, end])
                      return colored_record

vcsserver/lib/memory_lru_dict.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import logging
              from repoze.lru import LRUCache
-             from vcsserver.str_utils import safe_str
+             from vcsserver.lib.str_utils import safe_str
              log = logging.getLogger(__name__)
              class LRUDict(LRUCache):
                  """
                  Wrapper to provide partial dict access
                  """
                  def __setitem__(self, key, value):
                      return self.put(key, value)
                  def __getitem__(self, key):
                      return self.get(key)
                  def __contains__(self, key):
                      return bool(self.get(key))
                  def __delitem__(self, key):
                      del self.data[key]
                  def keys(self):
                      return list(self.data.keys())
              class LRUDictDebug(LRUDict):
                  """
                  Wrapper to provide some debug options
                  """
                  def _report_keys(self):
                      elems_cnt = f'{len(list(self.keys()))}/{self.size}'
                      # trick for pformat print it more nicely
                      fmt = '\n'
                      for cnt, elem in enumerate(self.keys()):
                          fmt += f'{cnt+1} - {safe_str(elem)}\n'
                      log.debug('current LRU keys (%s):%s', elems_cnt, fmt)
                  def __getitem__(self, key):
                      self._report_keys()
                      return self.get(key)

vcsserver/lib/rc_cache/backends.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              #import errno
              import fcntl
              import functools
              import logging
              import os
              import pickle
              #import time
              #import gevent
              import msgpack
              import redis
              flock_org = fcntl.flock
              from typing import Union
              from dogpile.cache.api import Deserializer, Serializer
              from dogpile.cache.backends import file as file_backend
              from dogpile.cache.backends import memory as memory_backend
              from dogpile.cache.backends import redis as redis_backend
              from dogpile.cache.backends.file import FileLock
              from dogpile.cache.util import memoized_property
              from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug
-             from vcsserver.str_utils import safe_bytes, safe_str
-             from vcsserver.type_utils import str2bool
+             from vcsserver.lib.str_utils import safe_bytes, safe_str
+             from vcsserver.lib.type_utils import str2bool
              _default_max_size = 1024
              log = logging.getLogger(__name__)
              class LRUMemoryBackend(memory_backend.MemoryBackend):
                  key_prefix = 'lru_mem_backend'
                  pickle_values = False
                  def __init__(self, arguments):
                      self.max_size = arguments.pop('max_size', _default_max_size)
                      LRUDictClass = LRUDict
                      if arguments.pop('log_key_count', None):
                          LRUDictClass = LRUDictDebug
                      arguments['cache_dict'] = LRUDictClass(self.max_size)
                      super().__init__(arguments)
                  def __repr__(self):
                      return f'{self.__class__}(maxsize=`{self.max_size}`)'
                  def __str__(self):
                      return self.__repr__()
                  def delete(self, key):
                      try:
                          del self._cache[key]
                      except KeyError:
                          # we don't care if key isn't there at deletion
                          pass
                  def list_keys(self, prefix):
                      return list(self._cache.keys())
                  def delete_multi(self, keys):
                      for key in keys:
                          self.delete(key)
                  def delete_multi_by_prefix(self, prefix):
                      cache_keys = self.list_keys(prefix=prefix)
                      num_affected_keys = len(cache_keys)
                      if num_affected_keys:
                          self.delete_multi(cache_keys)
                      return num_affected_keys
              class PickleSerializer:
                  serializer: None | Serializer = staticmethod(  # type: ignore
                      functools.partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
                  )
                  deserializer: None | Deserializer = staticmethod(  # type: ignore
                      functools.partial(pickle.loads)
                  )
              class MsgPackSerializer:
                  serializer: None | Serializer = staticmethod(  # type: ignore
                      msgpack.packb
                  )
                  deserializer: None | Deserializer = staticmethod(  # type: ignore
                      functools.partial(msgpack.unpackb, use_list=False)
                  )
              class CustomLockFactory(FileLock):
                  pass
              class FileNamespaceBackend(PickleSerializer, file_backend.DBMBackend):
                  key_prefix = 'file_backend'
                  def __init__(self, arguments):
                      arguments['lock_factory'] = CustomLockFactory
                      db_file = arguments.get('filename')
                      log.debug('initialing cache-backend=%s db in %s', self.__class__.__name__, db_file)
                      db_file_dir = os.path.dirname(db_file)
                      if not os.path.isdir(db_file_dir):
                          os.makedirs(db_file_dir)
                      try:
                          super().__init__(arguments)
                      except Exception:
                          log.exception('Failed to initialize db at: %s', db_file)
                          raise
                  def __repr__(self):
                      return f'{self.__class__}(file=`{self.filename}`)'
                  def __str__(self):
                      return self.__repr__()
                  def _get_keys_pattern(self, prefix: bytes = b''):
                      return b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
                  def list_keys(self, prefix: bytes = b''):
                      prefix = self._get_keys_pattern(prefix)
                      def cond(dbm_key: bytes):
                          if not prefix:
                              return True
                          if dbm_key.startswith(prefix):
                              return True
                          return False
                      with self._dbm_file(True) as dbm:
                          try:
                              return list(filter(cond, dbm.keys()))
                          except Exception:
                              log.error('Failed to fetch DBM keys from DB: %s', self.get_store())
                              raise
                  def delete_multi_by_prefix(self, prefix):
                      cache_keys = self.list_keys(prefix=prefix)
                      num_affected_keys = len(cache_keys)
                      if num_affected_keys:
                          self.delete_multi(cache_keys)
                      return num_affected_keys
                  def get_store(self):
                      return self.filename
              class BaseRedisBackend(redis_backend.RedisBackend):
                  key_prefix = ''
                  def __init__(self, arguments):
                      self.db_conn = arguments.get('host', '') or arguments.get('url', '') or 'redis-host'
                      super().__init__(arguments)
                      self._lock_timeout = self.lock_timeout
                      self._lock_auto_renewal = str2bool(arguments.pop("lock_auto_renewal", True))
                      if self._lock_auto_renewal and not self._lock_timeout:
                          # set default timeout for auto_renewal
                          self._lock_timeout = 30
                  def __repr__(self):
                      return f'{self.__class__}(conn=`{self.db_conn}`)'
                  def __str__(self):
                      return self.__repr__()
                  def _create_client(self):
                      args = {}
                      if self.url is not None:
                          args.update(url=self.url)
                      else:
                          args.update(
                              host=self.host, password=self.password,
                              port=self.port, db=self.db
                          )
                      connection_pool = redis.ConnectionPool(**args)
                      self.writer_client = redis.StrictRedis(
                          connection_pool=connection_pool
                      )
                      self.reader_client = self.writer_client
                  def _get_keys_pattern(self, prefix: bytes = b''):
                      return b'%b:%b*' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
                  def list_keys(self, prefix: bytes = b''):
                      prefix = self._get_keys_pattern(prefix)
                      return self.reader_client.keys(prefix)
                  def delete_multi_by_prefix(self, prefix, use_lua=False):
                      if use_lua:
                          # high efficient LUA script to delete ALL keys by prefix...
                          lua = """local keys = redis.call('keys', ARGV[1])
                                   for i=1,#keys,5000 do
                                   redis.call('del', unpack(keys, i, math.min(i+(5000-1), #keys)))
                                   end
                                   return #keys"""
                          num_affected_keys = self.writer_client.eval(
                              lua,
 ,
                              f"{prefix}*")
                      else:
                          cache_keys = self.list_keys(prefix=prefix)
                          num_affected_keys = len(cache_keys)
                          if num_affected_keys:
                              self.delete_multi(cache_keys)
                      return num_affected_keys
                  def get_store(self):
                      return self.reader_client.connection_pool
                  def get_mutex(self, key):
                      if self.distributed_lock:
                          lock_key = f'_lock_{safe_str(key)}'
                          return get_mutex_lock(
                              self.writer_client, lock_key,
                              self._lock_timeout,
                              auto_renewal=self._lock_auto_renewal
                          )
                      else:
                          return None
              class RedisPickleBackend(PickleSerializer, BaseRedisBackend):
                  key_prefix = 'redis_pickle_backend'
                  pass
              class RedisMsgPackBackend(MsgPackSerializer, BaseRedisBackend):
                  key_prefix = 'redis_msgpack_backend'
                  pass
              def get_mutex_lock(client, lock_key, lock_timeout, auto_renewal=False):
                  from vcsserver.lib._vendor import redis_lock
                  class _RedisLockWrapper:
                      """LockWrapper for redis_lock"""
                      @classmethod
                      def get_lock(cls):
                          return redis_lock.Lock(
                              redis_client=client,
                              name=lock_key,
                              expire=lock_timeout,
                              auto_renewal=auto_renewal,
                              strict=True,
                          )
                      def __repr__(self):
                          return f"{self.__class__.__name__}:{lock_key}"
                      def __str__(self):
                          return f"{self.__class__.__name__}:{lock_key}"
                      def __init__(self):
                          self.lock = self.get_lock()
                          self.lock_key = lock_key
                      def acquire(self, wait=True):
                          log.debug('Trying to acquire Redis lock for key %s', self.lock_key)
                          try:
                              acquired = self.lock.acquire(wait)
                              log.debug('Got lock for key %s, %s', self.lock_key, acquired)
                              return acquired
                          except redis_lock.AlreadyAcquired:
                              return False
                          except redis_lock.AlreadyStarted:
                              # refresh thread exists, but it also means we acquired the lock
                              return True
                      def release(self):
                          try:
                              self.lock.release()
                          except redis_lock.NotAcquired:
                              pass
                  return _RedisLockWrapper()

vcsserver/lib/rc_cache/utils.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import functools
              import logging
              import os
              import threading
              import time
              import decorator
              from dogpile.cache import CacheRegion
              from vcsserver.utils import sha1
-             from vcsserver.str_utils import safe_bytes
-             from vcsserver.type_utils import str2bool # noqa :required by imports from .utils
+             from vcsserver.lib.str_utils import safe_bytes
+             from vcsserver.lib.type_utils import str2bool # noqa :required by imports from .utils
              from . import region_meta
              log = logging.getLogger(__name__)
              class RhodeCodeCacheRegion(CacheRegion):
                  def __repr__(self):
                      return f'`{self.__class__.__name__}(name={self.name}, backend={self.backend.__class__})`'
                  def conditional_cache_on_arguments(
                          self, namespace=None,
                          expiration_time=None,
                          should_cache_fn=None,
                          to_str=str,
                          function_key_generator=None,
                          condition=True):
                      """
                      Custom conditional decorator, that will not touch any dogpile internals if
                      condition isn't meet. This works a bit different from should_cache_fn
                      And it's faster in cases we don't ever want to compute cached values
                      """
                      expiration_time_is_callable = callable(expiration_time)
                      if not namespace:
                          namespace = getattr(self, '_default_namespace', None)
                      if function_key_generator is None:
                          function_key_generator = self.function_key_generator
                      def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw):
                          if not condition:
                              log.debug('Calling un-cached method:%s', user_func.__name__)
                              start = time.time()
                              result = user_func(*arg, **kw)
                              total = time.time() - start
                              log.debug('un-cached method:%s took %.4fs', user_func.__name__, total)
                              return result
                          key = func_key_generator(*arg, **kw)
                          timeout = expiration_time() if expiration_time_is_callable \
                              else expiration_time
                          log.debug('Calling cached method:`%s`', user_func.__name__)
                          return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw))
                      def cache_decorator(user_func):
                          if to_str is str:
                              # backwards compatible
                              key_generator = function_key_generator(namespace, user_func)
                          else:
                              key_generator = function_key_generator(namespace, user_func, to_str=to_str)
                          def refresh(*arg, **kw):
                              """
                              Like invalidate, but regenerates the value instead
                              """
                              key = key_generator(*arg, **kw)
                              value = user_func(*arg, **kw)
                              self.set(key, value)
                              return value
                          def invalidate(*arg, **kw):
                              key = key_generator(*arg, **kw)
                              self.delete(key)
                          def set_(value, *arg, **kw):
                              key = key_generator(*arg, **kw)
                              self.set(key, value)
                          def get(*arg, **kw):
                              key = key_generator(*arg, **kw)
                              return self.get(key)
                          user_func.set = set_
                          user_func.invalidate = invalidate
                          user_func.get = get
                          user_func.refresh = refresh
                          user_func.key_generator = key_generator
                          user_func.original = user_func
                          # Use `decorate` to preserve the signature of :param:`user_func`.
                          return decorator.decorate(user_func, functools.partial(
                              get_or_create_for_user_func, key_generator))
                      return cache_decorator
              def make_region(*arg, **kw):
                  return RhodeCodeCacheRegion(*arg, **kw)
              def get_default_cache_settings(settings, prefixes=None):
                  prefixes = prefixes or []
                  cache_settings = {}
                  for key in settings.keys():
                      for prefix in prefixes:
                          if key.startswith(prefix):
                              name = key.split(prefix)[1].strip()
                              val = settings[key]
                              if isinstance(val, str):
                                  val = val.strip()
                              cache_settings[name] = val
                  return cache_settings
              def compute_key_from_params(*args):
                  """
                  Helper to compute key from given params to be used in cache manager
                  """
                  return sha1(safe_bytes("_".join(map(str, args))))
              def custom_key_generator(backend, namespace, fn):
                  func_name = fn.__name__
                  def generate_key(*args):
                      backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix'
                      namespace_pref = namespace or 'default_namespace'
                      arg_key = compute_key_from_params(*args)
                      final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}"
                      return final_key
                  return generate_key
              def backend_key_generator(backend):
                  """
                  Special wrapper that also sends over the backend to the key generator
                  """
                  def wrapper(namespace, fn):
                      return custom_key_generator(backend, namespace, fn)
                  return wrapper
              def get_or_create_region(region_name, region_namespace: str = None, use_async_runner=False):
                  from .backends import FileNamespaceBackend
                  from . import async_creation_runner
                  region_obj = region_meta.dogpile_cache_regions.get(region_name)
                  if not region_obj:
                      reg_keys = list(region_meta.dogpile_cache_regions.keys())
                      raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.')
                  region_uid_name = f'{region_name}:{region_namespace}'
                  # Special case for ONLY the FileNamespaceBackend backend. We register one-file-per-region
                  if isinstance(region_obj.actual_backend, FileNamespaceBackend):
                      if not region_namespace:
                          raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param')
                      region_exist = region_meta.dogpile_cache_regions.get(region_namespace)
                      if region_exist:
                          log.debug('Using already configured region: %s', region_namespace)
                          return region_exist
                      expiration_time = region_obj.expiration_time
                      cache_dir = region_meta.dogpile_config_defaults['cache_dir']
                      namespace_cache_dir = cache_dir
                      # we default the namespace_cache_dir to our default cache dir.
                      # however, if this backend is configured with filename= param, we prioritize that
                      # so all caches within that particular region, even those namespaced end up in the same path
                      if region_obj.actual_backend.filename:
                          namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename)
                      if not os.path.isdir(namespace_cache_dir):
                          os.makedirs(namespace_cache_dir)
                      new_region = make_region(
                          name=region_uid_name,
                          function_key_generator=backend_key_generator(region_obj.actual_backend)
                      )
                      namespace_filename = os.path.join(
                          namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db")
                      # special type that allows 1db per namespace
                      new_region.configure(
                          backend='dogpile.cache.rc.file_namespace',
                          expiration_time=expiration_time,
                          arguments={"filename": namespace_filename}
                      )
                      # create and save in region caches
                      log.debug('configuring new region: %s', region_uid_name)
                      region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region
                  region_obj._default_namespace = region_namespace
                  if use_async_runner:
                      region_obj.async_creation_runner = async_creation_runner
                  return region_obj
              def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str) -> int:
                  from . import CLEAR_DELETE, CLEAR_INVALIDATE
                  if not isinstance(cache_region, RhodeCodeCacheRegion):
                      cache_region = get_or_create_region(cache_region, cache_namespace_uid)
                  log.debug('clearing cache region: %s [prefix:%s] with method=%s',
                            cache_region, cache_namespace_uid, method)
                  num_affected_keys = 0
                  if method == CLEAR_INVALIDATE:
                      # NOTE: The CacheRegion.invalidate() method’s default mode of
                      # operation is to set a timestamp local to this CacheRegion in this Python process only.
                      # It does not impact other Python processes or regions as the timestamp is only stored locally in memory.
                      cache_region.invalidate(hard=True)
                  if method == CLEAR_DELETE:
                      num_affected_keys = cache_region.backend.delete_multi_by_prefix(prefix=cache_namespace_uid)
                  return num_affected_keys

vcsserver/lib/str_utils.py ~~vcsserver/str_utils.py~~

0 renamed +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import typing
              import base64
              import logging
              log = logging.getLogger(__name__)
              def safe_int(val, default=None) -> int:
                  """
                  Returns int() of val if val is not convertable to int use default
                  instead
                  :param val:
                  :param default:
                  """
                  try:
                      val = int(val)
                  except (ValueError, TypeError):
                      val = default
                  return val
              def base64_to_str(text) -> str:
                  return safe_str(base64.encodebytes(safe_bytes(text))).strip()
              def get_default_encodings() -> list[str]:
                  return ['utf8']
              def safe_str(str_, to_encoding=None) -> str:
                  """
                  safe str function. Does few trick to turn unicode_ into string
                  :param str_: str to encode
                  :param to_encoding: encode to this type UTF8 default
                  """
                  if isinstance(str_, str):
                      return str_
                  # if it's bytes cast to str
                  if not isinstance(str_, bytes):
                      return str(str_)
                  to_encoding = to_encoding or get_default_encodings()
                  if not isinstance(to_encoding, (list, tuple)):
                      to_encoding = [to_encoding]
                  for enc in to_encoding:
                      try:
                          return str(str_, enc)
                      except UnicodeDecodeError:
                          pass
                  return str(str_, to_encoding[0], 'replace')
              def safe_bytes(str_, from_encoding=None) -> bytes:
                  """
                  safe bytes function. Does few trick to turn str_ into bytes string:
                  :param str_: string to decode
                  :param from_encoding: encode from this type UTF8 default
                  """
                  if isinstance(str_, bytes):
                      return str_
                  if not isinstance(str_, str):
                      raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
                  from_encoding = from_encoding or get_default_encodings()
                  if not isinstance(from_encoding, (list, tuple)):
                      from_encoding = [from_encoding]
                  for enc in from_encoding:
                      try:
                          return str_.encode(enc)
                      except UnicodeDecodeError:
                          pass
                  return str_.encode(from_encoding[0], 'replace')
              def ascii_bytes(str_, allow_bytes=False) -> bytes:
                  """
                  Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
                  Fails with UnicodeError on invalid input.
                  This should be used where encoding and "safe" ambiguity should be avoided.
                  Where strings already have been encoded in other ways but still are unicode
                  string - for example to hex, base64, json, urlencoding, or are known to be
                  identifiers.
                  """
                  if allow_bytes and isinstance(str_, bytes):
                      return str_
                  if not isinstance(str_, str):
                      raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
                  return str_.encode('ascii')
              def ascii_str(str_) -> str:
                  """
                  Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
                  Fails with UnicodeError on invalid input.
                  This should be used where encoding and "safe" ambiguity should be avoided.
                  Where strings are encoded but also in other ways are known to be ASCII, and
                  where a unicode string is wanted without caring about encoding. For example
                  to hex, base64, urlencoding, or are known to be identifiers.
                  """
                  if not isinstance(str_, bytes):
                      raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
                  return str_.decode('ascii')
              def convert_to_str(data):
                  if isinstance(data, bytes):
                      return safe_str(data)
                  elif isinstance(data, tuple):
                      return tuple(convert_to_str(item) for item in data)
                  elif isinstance(data, list):
                      return list(convert_to_str(item) for item in data)
                  else:
                      return data
              def splitnewlines(text: bytes):
                  """
                  like splitlines, but only split on newlines.
                  """
                  lines = [_l + b'\n' for _l in text.split(b'\n')]
                  if lines:
                      if lines[-1] == b'\n':
                          lines.pop()
                      else:
                          lines[-1] = lines[-1][:-1]
-                 return lines
  No newline at end of file
+                 return lines

vcsserver/lib/svnremoterepo.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import tempfile
              from svn import client
              from svn import core
              from svn import ra
              from mercurial import error
-             from vcsserver.str_utils import safe_bytes
+             from vcsserver.lib.str_utils import safe_bytes
              core.svn_config_ensure(None)
              svn_config = core.svn_config_get_config(None)
              class RaCallbacks(ra.Callbacks):
                  @staticmethod
                  def open_tmp_file(pool):  # pragma: no cover
                      (fd, fn) = tempfile.mkstemp()
                      os.close(fd)
                      return fn
                  @staticmethod
                  def get_client_string(pool):
                      return b'RhodeCode-subversion-url-checker'
              class SubversionException(Exception):
                  pass
              class SubversionConnectionException(SubversionException):
                  """Exception raised when a generic error occurs when connecting to a repository."""
              def normalize_url(url):
                  if not url:
                      return url
                  if url.startswith(b'svn+http://') or url.startswith(b'svn+https://'):
                      url = url[4:]
                  url = url.rstrip(b'/')
                  return url
              def _create_auth_baton(pool):
                  """Create a Subversion authentication baton. """
                  # Give the client context baton a suite of authentication
                  # providers.h
                  platform_specific = [
                      'svn_auth_get_gnome_keyring_simple_provider',
                      'svn_auth_get_gnome_keyring_ssl_client_cert_pw_provider',
                      'svn_auth_get_keychain_simple_provider',
                      'svn_auth_get_keychain_ssl_client_cert_pw_provider',
                      'svn_auth_get_kwallet_simple_provider',
                      'svn_auth_get_kwallet_ssl_client_cert_pw_provider',
                      'svn_auth_get_ssl_client_cert_file_provider',
                      'svn_auth_get_windows_simple_provider',
                      'svn_auth_get_windows_ssl_server_trust_provider',
                  ]
                  providers = []
                  for p in platform_specific:
                      if getattr(core, p, None) is not None:
                          try:
                              providers.append(getattr(core, p)())
                          except RuntimeError:
                              pass
                  providers += [
                      client.get_simple_provider(),
                      client.get_username_provider(),
                      client.get_ssl_client_cert_file_provider(),
                      client.get_ssl_client_cert_pw_file_provider(),
                      client.get_ssl_server_trust_file_provider(),
                  ]
                  return core.svn_auth_open(providers, pool)
              class SubversionRepo:
                  """Wrapper for a Subversion repository.
                  It uses the SWIG Python bindings, see above for requirements.
                  """
                  def __init__(self, svn_url: bytes = b'', username: bytes = b'', password: bytes = b''):
                      self.username = username
                      self.password = password
                      self.svn_url = core.svn_path_canonicalize(svn_url)
                      self.auth_baton_pool = core.Pool()
                      self.auth_baton = _create_auth_baton(self.auth_baton_pool)
                      # self.init_ra_and_client() assumes that a pool already exists
                      self.pool = core.Pool()
                      self.ra = self.init_ra_and_client()
                      self.uuid = ra.get_uuid(self.ra, self.pool)
                  def init_ra_and_client(self):
                      """Initializes the RA and client layers, because sometimes getting
                      unified diffs runs the remote server out of open files.
                      """
                      if self.username:
                          core.svn_auth_set_parameter(self.auth_baton,
                                                      core.SVN_AUTH_PARAM_DEFAULT_USERNAME,
                                                      self.username)
                      if self.password:
                          core.svn_auth_set_parameter(self.auth_baton,
                                                      core.SVN_AUTH_PARAM_DEFAULT_PASSWORD,
                                                      self.password)
                      callbacks = RaCallbacks()
                      callbacks.auth_baton = self.auth_baton
                      try:
                          return ra.open2(self.svn_url, callbacks, svn_config, self.pool)
                      except SubversionException as e:
                          # e.child contains a detailed error messages
                          msglist = []
                          svn_exc = e
                          while svn_exc:
                              if svn_exc.args[0]:
                                  msglist.append(svn_exc.args[0])
                              svn_exc = svn_exc.child
                          msg = '\n'.join(msglist)
                          raise SubversionConnectionException(msg)
              class svnremoterepo:
                  """ the dumb wrapper for actual Subversion repositories """
                  def __init__(self, username: bytes = b'', password: bytes = b'', svn_url: bytes = b''):
                      self.username = username or b''
                      self.password = password or b''
                      self.path = normalize_url(svn_url)
                  def svn(self):
                      try:
                          return SubversionRepo(self.path, self.username, self.password)
                      except SubversionConnectionException as e:
                          raise error.Abort(safe_bytes(e))

vcsserver/lib/type_utils.py ~~vcsserver/type_utils.py~~

0 renamed 0 0

NO CONTENT: file renamed from vcsserver/type_utils.py to vcsserver/lib/type_utils.py

vcsserver/pygrack.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              """Handles the Git smart protocol."""
              import os
              import socket
              import logging
              import dulwich.protocol
              from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
              from webob import Request, Response, exc
-             from vcsserver.lib.rc_json import json
+             from vcsserver.lib.ext_json import json
              from vcsserver import hooks, subprocessio
-             from vcsserver.str_utils import ascii_bytes
+             from vcsserver.lib.str_utils import ascii_bytes
              log = logging.getLogger(__name__)
              class FileWrapper:
                  """File wrapper that ensures how much data is read from it."""
                  def __init__(self, fd, content_length):
                      self.fd = fd
                      self.content_length = content_length
                      self.remain = content_length
                  def read(self, size):
                      if size <= self.remain:
                          try:
                              data = self.fd.read(size)
                          except socket.error:
                              raise IOError(self)
                          self.remain -= size
                      elif self.remain:
                          data = self.fd.read(self.remain)
                          self.remain = 0
                      else:
                          data = None
                      return data
                  def __repr__(self):
                      return '<FileWrapper {} len: {}, read: {}>'.format(
                          self.fd, self.content_length, self.content_length - self.remain
                      )
              class GitRepository:
                  """WSGI app for handling Git smart protocol endpoints."""
                  git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
                  commands = frozenset(('git-upload-pack', 'git-receive-pack'))
                  valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
                  # The last bytes are the SHA1 of the first 12 bytes.
                  EMPTY_PACK = (
                      b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
                      b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
                  )
                  FLUSH_PACKET = b"0000"
                  SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
                  def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
                      files = frozenset(f.lower() for f in os.listdir(content_path))
                      valid_dir_signature = self.git_folder_signature.issubset(files)
                      if not valid_dir_signature:
                          raise OSError(f'{content_path} missing git signature')
                      self.content_path = content_path
                      self.repo_name = repo_name
                      self.extras = extras
                      self.git_path = git_path
                      self.update_server_info = update_server_info
                  def _get_fixedpath(self, path):
                      """
                      Small fix for repo_path
                      :param path:
                      """
                      path = path.split(self.repo_name, 1)[-1]
                      if path.startswith('.git'):
                          # for bare repos we still get the .git prefix inside, we skip it
                          # here, and remove from the service command
                          path = path[4:]
                      return path.strip('/')
                  def inforefs(self, request, unused_environ):
                      """
                      WSGI Response producer for HTTP GET Git Smart
                      HTTP /info/refs request.
                      """
                      git_command = request.GET.get('service')
                      if git_command not in self.commands:
                          log.debug('command %s not allowed', git_command)
                          return exc.HTTPForbidden()
                      # please, resist the urge to add '\n' to git capture and increment
                      # line count by 1.
                      # by git docs: Documentation/technical/http-protocol.txt#L214 \n is
                      # a part of protocol.
                      # The code in Git client not only does NOT need '\n', but actually
                      # blows up if you sprinkle "flush" (0000) as "0001\n".
                      # It reads binary, per number of bytes specified.
                      # if you do add '\n' as part of data, count it.
                      server_advert = f'# service={git_command}\n'
                      packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
                      try:
                          gitenv = dict(os.environ)
                          # forget all configs
                          gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
                          command = [self.git_path, git_command[4:], '--stateless-rpc',
                                     '--advertise-refs', self.content_path]
                          out = subprocessio.SubprocessIOChunker(
                              command,
                              env=gitenv,
                              starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
                              shell=False
                          )
                      except OSError:
                          log.exception('Error processing command')
                          raise exc.HTTPExpectationFailed()
                      resp = Response()
                      resp.content_type = f'application/x-{git_command}-advertisement'
                      resp.charset = None
                      resp.app_iter = out
                      return resp
                  def _get_want_capabilities(self, request):
                      """Read the capabilities found in the first want line of the request."""
                      pos = request.body_file_seekable.tell()
                      first_line = request.body_file_seekable.readline()
                      request.body_file_seekable.seek(pos)
                      return frozenset(
                          dulwich.protocol.extract_want_line_capabilities(first_line)[1])
                  def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
                      """
                      Construct a response with an empty PACK file.
                      We use an empty PACK file, as that would trigger the failure of the pull
                      or clone command.
                      We also print in the error output a message explaining why the command
                      was aborted.
                      If additionally, the user is accepting messages we send them the output
                      of the pre-pull hook.
                      Note that for clients not supporting side-band we just send them the
                      emtpy PACK file.
                      """
                      if self.SIDE_BAND_CAPS.intersection(capabilities):
                          response = []
                          proto = dulwich.protocol.Protocol(None, response.append)
                          proto.write_pkt_line(dulwich.protocol.NAK_LINE)
                          self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
                          # N.B.(skreft): Do not change the sideband channel to 3, as that
                          # produces a fatal error in the client:
                          #   fatal: error in sideband demultiplexer
                          proto.write_sideband(
                              dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
                              ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
                          proto.write_sideband(
                              dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
                              ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
                          # writes b"0000" as default
                          proto.write_pkt_line(None)
                          return response
                      else:
                          return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
                  def _build_post_pull_response(self, response, capabilities, start_message, end_message):
                      """
                      Given a list response we inject the post-pull messages.
                      We only inject the messages if the client supports sideband, and the
                      response has the format:
 NAK\n...0000
                      Note that we do not check the no-progress capability as by default, git
                      sends it, which effectively would block all messages.
                      """
                      if not self.SIDE_BAND_CAPS.intersection(capabilities):
                          return response
                      if not start_message and not end_message:
                          return response
                      try:
                          iter(response)
                          # iterator probably will work, we continue
                      except TypeError:
                          raise TypeError(f'response must be an iterator: got {type(response)}')
                      if isinstance(response, (list, tuple)):
                          raise TypeError(f'response must be an iterator: got {type(response)}')
                      def injected_response():
                          do_loop = 1
                          header_injected = 0
                          next_item = None
                          has_item = False
                          item = b''
                          while do_loop:
                              try:
                                  next_item = next(response)
                              except StopIteration:
                                  do_loop = 0
                              if has_item:
                                  # last item ! alter it now
                                  if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
                                      new_response = [item[:-4]]
                                      new_response.extend(self._get_messages(end_message, capabilities))
                                      new_response.append(self.FLUSH_PACKET)
                                      item = b''.join(new_response)
                                  yield item
                              has_item = True
                              item = next_item
                              # alter item if it's the initial chunk
                              if not header_injected and item.startswith(b'0008NAK\n'):
                                  new_response = [b'0008NAK\n']
                                  new_response.extend(self._get_messages(start_message, capabilities))
                                  new_response.append(item[8:])
                                  item = b''.join(new_response)
                                  header_injected = 1
                      return injected_response()
                  def _write_sideband_to_proto(self, proto, data, capabilities):
                      """
                      Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
                      We do not use dulwich's write_sideband directly as it only supports
                      side-band-64k.
                      """
                      if not data:
                          return
                      # N.B.(skreft): The values below are explained in the pack protocol
                      # documentation, section Packfile Data.
                      # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
                      if CAPABILITY_SIDE_BAND_64K in capabilities:
                          chunk_size = 65515
                      elif CAPABILITY_SIDE_BAND in capabilities:
                          chunk_size = 995
                      else:
                          return
                      chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
                      for chunk in chunker:
                          proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
                  def _get_messages(self, data, capabilities):
                      """Return a list with packets for sending data in sideband number 2."""
                      response = []
                      proto = dulwich.protocol.Protocol(None, response.append)
                      self._write_sideband_to_proto(proto, data, capabilities)
                      return response
                  def backend(self, request, environ):
                      """
                      WSGI Response producer for HTTP POST Git Smart HTTP requests.
                      Reads commands and data from HTTP POST's body.
                      returns an iterator obj with contents of git command's
                      response to stdout
                      """
                      # TODO(skreft): think how we could detect an HTTPLockedException, as
                      # we probably want to have the same mechanism used by mercurial and
                      # simplevcs.
                      # For that we would need to parse the output of the command looking for
                      # some signs of the HTTPLockedError, parse the data and reraise it in
                      # pygrack. However, that would interfere with the streaming.
                      #
                      # Now the output of a blocked push is:
                      # Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
                      # POST git-receive-pack (1047 bytes)
                      # remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
                      # To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
                      # ! [remote rejected] master -> master (pre-receive hook declined)
                      # error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
                      git_command = self._get_fixedpath(request.path_info)
                      if git_command not in self.commands:
                          log.debug('command %s not allowed', git_command)
                          return exc.HTTPForbidden()
                      capabilities = None
                      if git_command == 'git-upload-pack':
                          capabilities = self._get_want_capabilities(request)
                      if 'CONTENT_LENGTH' in environ:
                          inputstream = FileWrapper(request.body_file_seekable,
                                                    request.content_length)
                      else:
                          inputstream = request.body_file_seekable
                      resp = Response()
                      resp.content_type = f'application/x-{git_command}-result'
                      resp.charset = None
                      pre_pull_messages = ''
                      # Upload-pack == clone
                      if git_command == 'git-upload-pack':
                          hook_response = hooks.git_pre_pull(self.extras)
                          if hook_response.status != 0:
                              pre_pull_messages = hook_response.output
                              resp.app_iter = self._build_failed_pre_pull_response(
                                  capabilities, pre_pull_messages)
                              return resp
                      gitenv = dict(os.environ)
                      # forget all configs
                      gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
                      gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
                      cmd = [self.git_path, git_command[4:], '--stateless-rpc',
                             self.content_path]
                      log.debug('handling cmd %s', cmd)
                      out = subprocessio.SubprocessIOChunker(
                          cmd,
                          input_stream=inputstream,
                          env=gitenv,
                          cwd=self.content_path,
                          shell=False,
                          fail_on_stderr=False,
                          fail_on_return_code=False
                      )
                      if self.update_server_info and git_command == 'git-receive-pack':
                          # We need to fully consume the iterator here, as the
                          # update-server-info command needs to be run after the push.
                          out = list(out)
                          # Updating refs manually after each push.
                          # This is required as some clients are exposing Git repos internally
                          # with the dumb protocol.
                          cmd = [self.git_path, 'update-server-info']
                          log.debug('handling cmd %s', cmd)
                          output = subprocessio.SubprocessIOChunker(
                              cmd,
                              input_stream=inputstream,
                              env=gitenv,
                              cwd=self.content_path,
                              shell=False,
                              fail_on_stderr=False,
                              fail_on_return_code=False
                          )
                          # Consume all the output so the subprocess finishes
                          for _ in output:
                              pass
                      # Upload-pack == clone
                      if git_command == 'git-upload-pack':
                          hook_response = hooks.git_post_pull(self.extras)
                          post_pull_messages = hook_response.output
                          resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
                      else:
                          resp.app_iter = out
                      return resp
                  def __call__(self, environ, start_response):
                      request = Request(environ)
                      _path = self._get_fixedpath(request.path_info)
                      if _path.startswith('info/refs'):
                          app = self.inforefs
                      else:
                          app = self.backend
                      try:
                          resp = app(request, environ)
                      except exc.HTTPException as error:
                          log.exception('HTTP Error')
                          resp = error
                      except Exception:
                          log.exception('Unknown error')
                          resp = exc.HTTPInternalServerError()
                      return resp(environ, start_response)

vcsserver/remote/git_remote.py

0 +8 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import collections
              import logging
              import os
              import re
              import stat
              import traceback
              import urllib.request
              import urllib.parse
              import urllib.error
              from functools import wraps
              import more_itertools
              import pygit2
              from pygit2 import Repository as LibGit2Repo
              from pygit2 import index as LibGit2Index
              from dulwich import index, objects
              from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
              from dulwich.errors import (
                  NotGitRepository, ChecksumMismatch, WrongObjectException,
                  MissingCommitError, ObjectMissing, HangupException,
                  UnexpectedCommandError)
              from dulwich.repo import Repo as DulwichRepo
              import rhodecode
              from vcsserver import exceptions, settings, subprocessio
-             from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
+             from vcsserver.lib.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
              from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
              from vcsserver.hgcompat import (
                  hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
              from vcsserver.git_lfs.lib import LFSOidStore
              from vcsserver.vcs_base import RemoteBase
              DIR_STAT = stat.S_IFDIR
              FILE_MODE = stat.S_IFMT
              GIT_LINK = objects.S_IFGITLINK
              PEELED_REF_MARKER = b'^{}'
              HEAD_MARKER = b'HEAD'
              log = logging.getLogger(__name__)
              def reraise_safe_exceptions(func):
                  """Converts Dulwich exceptions to something neutral."""
                  @wraps(func)
                  def wrapper(*args, **kwargs):
                      try:
                          return func(*args, **kwargs)
                      except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
                          exc = exceptions.LookupException(org_exc=e)
                          raise exc(safe_str(e))
                      except (HangupException, UnexpectedCommandError) as e:
                          exc = exceptions.VcsException(org_exc=e)
                          raise exc(safe_str(e))
                      except Exception:
                          # NOTE(marcink): because of how dulwich handles some exceptions
                          # (KeyError on empty repos), we cannot track this and catch all
                          # exceptions, it's an exceptions from other handlers
                          #if not hasattr(e, '_vcs_kind'):
                              #log.exception("Unhandled exception in git remote call")
                              #raise_from_original(exceptions.UnhandledException)
                          raise
                  return wrapper
              class Repo(DulwichRepo):
                  """
                  A wrapper for dulwich Repo class.
                  Since dulwich is sometimes keeping .idx file descriptors open, it leads to
                  "Too many open files" error. We need to close all opened file descriptors
                  once the repo object is destroyed.
                  """
                  def __del__(self):
                      if hasattr(self, 'object_store'):
                          self.close()
              class Repository(LibGit2Repo):
                  def __enter__(self):
                      return self
                  def __exit__(self, exc_type, exc_val, exc_tb):
                      self.free()
              class GitFactory(RepoFactory):
                  repo_type = 'git'
                  def _create_repo(self, wire, create, use_libgit2=False):
                      if use_libgit2:
                          repo = Repository(safe_bytes(wire['path']))
                      else:
                          # dulwich mode
                          repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
                          repo = Repo(repo_path)
                      log.debug('repository created: got GIT object: %s', repo)
                      return repo
                  def repo(self, wire, create=False, use_libgit2=False):
                      """
                      Get a repository instance for the given path.
                      """
                      return self._create_repo(wire, create, use_libgit2)
                  def repo_libgit2(self, wire):
                      return self.repo(wire, use_libgit2=True)
              def create_signature_from_string(author_str, **kwargs):
                  """
                  Creates a pygit2.Signature object from a string of the format 'Name <email>'.
                  :param author_str: String of the format 'Name <email>'
                  :return: pygit2.Signature object
                  """
                  match = re.match(r'^(.+) <(.+)>$', author_str)
                  if match is None:
                      raise ValueError(f"Invalid format: {author_str}")
                  name, email = match.groups()
                  return pygit2.Signature(name, email, **kwargs)
              def get_obfuscated_url(url_obj):
                  url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
                  url_obj.query = obfuscate_qs(url_obj.query)
                  obfuscated_uri = str(url_obj)
                  return obfuscated_uri
              class GitRemote(RemoteBase):
                  def __init__(self, factory):
                      self._factory = factory
                      self._bulk_methods = {
                          "date": self.date,
                          "author": self.author,
                          "branch": self.branch,
                          "message": self.message,
                          "parents": self.parents,
                          "_commit": self.revision,
                      }
                      self._bulk_file_methods = {
                          "size": self.get_node_size,
                          "data": self.get_node_data,
                          "flags": self.get_node_flags,
                          "is_binary": self.get_node_is_binary,
                          "md5": self.md5_hash
                      }
                  def _wire_to_config(self, wire):
                      if 'config' in wire:
                          return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
                      return {}
                  def _remote_conf(self, config):
                      params = [
                          '-c', 'core.askpass=""',
                      ]
                      config_attrs = {
                          'vcs_ssl_dir': 'http.sslCAinfo={}',
                          'vcs_git_lfs_store_location': 'lfs.storage={}'
                      }
                      for key, param in config_attrs.items():
                          if value := config.get(key):
                              params.extend(['-c', param.format(value)])
                      return params
                  @reraise_safe_exceptions
                  def discover_git_version(self):
                      stdout, _ = self.run_git_command(
                          {}, ['--version'], _bare=True, _safe=True)
                      prefix = b'git version'
                      if stdout.startswith(prefix):
                          stdout = stdout[len(prefix):]
                      return safe_str(stdout.strip())
                  @reraise_safe_exceptions
                  def is_empty(self, wire):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          try:
                              has_head = repo.head.name
                              if has_head:
                                  return False
                              # NOTE(marcink): check again using more expensive method
                              return repo.is_empty
                          except Exception:
                              pass
                          return True
                  @reraise_safe_exceptions
                  def assert_correct_path(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _assert_correct_path(_context_uid, _repo_id, fast_check):
                          if fast_check:
                              path = safe_str(wire['path'])
                              if pygit2.discover_repository(path):
                                  return True
                              return False
                          else:
                              try:
                                  repo_init = self._factory.repo_libgit2(wire)
                                  with repo_init:
                                      pass
                              except pygit2.GitError:
                                  path = wire.get('path')
                                  tb = traceback.format_exc()
                                  log.debug("Invalid Git path `%s`, tb: %s", path, tb)
                                  return False
                              return True
                      return _assert_correct_path(context_uid, repo_id, True)
                  @reraise_safe_exceptions
                  def bare(self, wire):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          return repo.is_bare
                  @reraise_safe_exceptions
                  def get_node_data(self, wire, commit_id, path):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blob_obj = commit.tree[path]
                          if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                              raise exceptions.LookupException()(
                                  f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
                          return BytesEnvelope(blob_obj.data)
                  @reraise_safe_exceptions
                  def get_node_size(self, wire, commit_id, path):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blob_obj = commit.tree[path]
                          if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                              raise exceptions.LookupException()(
                                  f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
                          return blob_obj.size
                  @reraise_safe_exceptions
                  def get_node_flags(self, wire, commit_id, path):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blob_obj = commit.tree[path]
                          if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                              raise exceptions.LookupException()(
                                  f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
                          return blob_obj.filemode
                  @reraise_safe_exceptions
                  def get_node_is_binary(self, wire, commit_id, path):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blob_obj = commit.tree[path]
                          if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                              raise exceptions.LookupException()(
                                  f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
                          return blob_obj.is_binary
                  @reraise_safe_exceptions
                  def blob_as_pretty_string(self, wire, sha):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          blob_obj = repo[sha]
                          return BytesEnvelope(blob_obj.data)
                  @reraise_safe_exceptions
                  def blob_raw_length(self, wire, sha):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _blob_raw_length(_repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              blob = repo[sha]
                              return blob.size
                      return _blob_raw_length(repo_id, sha)
                  def _parse_lfs_pointer(self, raw_content):
                      spec_string = b'version https://git-lfs.github.com/spec'
                      if raw_content and raw_content.startswith(spec_string):
                          pattern = re.compile(rb"""
                          (?:\n)?
                          ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
                          ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
                          ^size[ ](?P<oid_size>[0-9]+)\n
                          (?:\n)?
                          """, re.VERBOSE | re.MULTILINE)
                          match = pattern.match(raw_content)
                          if match:
                              return match.groupdict()
                      return {}
                  @reraise_safe_exceptions
                  def is_large_file(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_large_file(_repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              blob = repo[commit_id]
                              if blob.is_binary:
                                  return {}
                              return self._parse_lfs_pointer(blob.data)
                      return _is_large_file(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def is_binary(self, wire, tree_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_binary(_repo_id, _tree_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              blob_obj = repo[tree_id]
                              return blob_obj.is_binary
                      return _is_binary(repo_id, tree_id)
                  @reraise_safe_exceptions
                  def md5_hash(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _md5_hash(_repo_id, _commit_id, _path):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[_commit_id]
                              blob_obj = commit.tree[_path]
                              if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                                  raise exceptions.LookupException()(
                                      f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
                              return ''
                      return _md5_hash(repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def in_largefiles_store(self, wire, oid):
                      conf = self._wire_to_config(wire)
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo_name = repo.path
                      store_location = conf.get('vcs_git_lfs_store_location')
                      if store_location:
                          store = LFSOidStore(
                              oid=oid, repo=repo_name, store_location=store_location)
                          return store.has_oid()
                      return False
                  @reraise_safe_exceptions
                  def store_path(self, wire, oid):
                      conf = self._wire_to_config(wire)
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo_name = repo.path
                      store_location = conf.get('vcs_git_lfs_store_location')
                      if store_location:
                          store = LFSOidStore(
                              oid=oid, repo=repo_name, store_location=store_location)
                          return store.oid_path
                      raise ValueError(f'Unable to fetch oid with path {oid}')
                  @reraise_safe_exceptions
                  def bulk_request(self, wire, rev, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_request(_repo_id, _rev, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_methods[attr]
                                  wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  args = [wire, rev]
                                  result[attr] = method(*args)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
                          return result
                      return _bulk_request(repo_id, rev, sorted(pre_load))
                  @reraise_safe_exceptions
                  def bulk_file_request(self, wire, commit_id, path, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_file_methods[attr]
                                  wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  result[attr] = method(wire, _commit_id, _path)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
                          return result
                      return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
                  def _build_opener(self, url: str):
                      handlers = []
                      url_obj = url_parser(safe_bytes(url))
                      authinfo = url_obj.authinfo()[1]
                      if authinfo:
                          # create a password manager
                          passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
                          passmgr.add_password(*convert_to_str(authinfo))
                          handlers.extend((httpbasicauthhandler(passmgr),
                                           httpdigestauthhandler(passmgr)))
                      return urllib.request.build_opener(*handlers)
                  @reraise_safe_exceptions
                  def check_url(self, url, config):
                      url_obj = url_parser(safe_bytes(url))
                      test_uri = safe_str(url_obj.authinfo()[0])
                      obfuscated_uri = get_obfuscated_url(url_obj)
                      log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
                      if not test_uri.endswith('info/refs'):
                          test_uri = test_uri.rstrip('/') + '/info/refs'
                      o = self._build_opener(url=url)
                      o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
                      q = {"service": 'git-upload-pack'}
                      qs = f'?{urllib.parse.urlencode(q)}'
                      cu = f"{test_uri}{qs}"
                      try:
                          req = urllib.request.Request(cu, None, {})
                          log.debug("Trying to open URL %s", obfuscated_uri)
                          resp = o.open(req)
                          if resp.code != 200:
                              raise exceptions.URLError()('Return Code is not 200')
                      except Exception as e:
                          log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
                          # means it cannot be cloned
                          raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
                      # now detect if it's proper git repo
                      gitdata: bytes = resp.read()
                      if b'service=git-upload-pack' in gitdata:
                          pass
                      elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
                          # old style git can return some other format!
                          pass
                      else:
                          e = None
                          raise exceptions.URLError(e)(
                              f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
                      return True
                  @reraise_safe_exceptions
                  def clone(self, wire, url, deferred, valid_refs, update_after_clone):
                      # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
                      remote_refs = self.pull(wire, url, apply_refs=False)
                      repo = self._factory.repo(wire)
                      if isinstance(valid_refs, list):
                          valid_refs = tuple(valid_refs)
                      for k in remote_refs:
                          # only parse heads/tags and skip so called deferred tags
                          if k.startswith(valid_refs) and not k.endswith(deferred):
                              repo[k] = remote_refs[k]
                      if update_after_clone:
                          # we want to checkout HEAD
                          repo["HEAD"] = remote_refs["HEAD"]
                          index.build_index_from_tree(repo.path, repo.index_path(),
                                                      repo.object_store, repo["HEAD"].tree)
                  @reraise_safe_exceptions
                  def branch(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _branch(_context_uid, _repo_id, _commit_id):
                          regex = re.compile('^refs/heads')
                          def filter_with(ref):
                              return regex.match(ref[0]) and ref[1] == _commit_id
                          branches = list(filter(filter_with, list(self.get_refs(wire).items())))
                          return [x[0].split('refs/heads/')[-1] for x in branches]
                      return _branch(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
+                 def delete_branch(self, wire, branch_name):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         if branch := repo.lookup_branch(branch_name):
+                             branch.delete()
+                 @reraise_safe_exceptions
                  def commit_branches(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _commit_branches(_context_uid, _repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              branches = [x for x in repo.branches.with_commit(_commit_id)]
                              return branches
                      return _commit_branches(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def add_object(self, wire, content):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          blob = objects.Blob()
                          blob.set_raw_string(content)
                          repo.object_store.add_object(blob)
                          return blob.id
                  @reraise_safe_exceptions
                  def create_commit(self, wire, author, committer, message, branch, new_tree_id,
                                    date_args: list[int, int] = None,
                                    parents: list | None = None):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          if date_args:
                              current_time, offset = date_args
                              kw = {
                                  'time': current_time,
                                  'offset': offset
                              }
                          author = create_signature_from_string(author, **kw)
                          committer = create_signature_from_string(committer, **kw)
                          tree = new_tree_id
                          if isinstance(tree, (bytes, str)):
                              # validate this tree is in the repo...
                              tree = repo[safe_str(tree)].id
                          if parents:
                              # run via sha's and validate them in repo
                              parents = [repo[c].id for c in parents]
                          else:
                              parents = []
                              # ensure we COMMIT on top of given branch head
                              # check if this repo has ANY branches, otherwise it's a new branch case we need to make
                              if branch in repo.branches.local:
                                  parents += [repo.branches[branch].target]
                              elif [x for x in repo.branches.local]:
                                  parents += [repo.head.target]
                              #else:
                                  # in case we want to commit on new branch we create it on top of HEAD
                                  #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
                          # # Create a new commit
                          commit_oid = repo.create_commit(
                              f'refs/heads/{branch}',  # the name of the reference to update
                              author,  # the author of the commit
                              committer,  # the committer of the commit
                              message,  # the commit message
                              tree,  # the tree produced by the index
                              parents  # list of parents for the new commit, usually just one,
                          )
                          new_commit_id = safe_str(commit_oid)
                          return new_commit_id
                  @reraise_safe_exceptions
                  def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
                      def mode2pygit(mode):
                          """
                          git only supports two filemode 644 and 755
 o100755 -> 33261
 o100644 -> 33188
                          """
                          return {
 o100644: pygit2.GIT_FILEMODE_BLOB,
 o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
 o120000: pygit2.GIT_FILEMODE_LINK
                          }.get(mode) or pygit2.GIT_FILEMODE_BLOB
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo_index = repo.index
                          commit_parents = None
                          if commit_tree and commit_data['parents']:
                              commit_parents = commit_data['parents']
                              parent_commit = repo[commit_parents[0]]
                              repo_index.read_tree(parent_commit.tree)
                          for pathspec in updated:
                              blob_id = repo.create_blob(pathspec['content'])
                              ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
                              repo_index.add(ie)
                          for pathspec in removed:
                              repo_index.remove(pathspec)
                          # Write changes to the index
                          repo_index.write()
                          # Create a tree from the updated index
                          written_commit_tree = repo_index.write_tree()
                      new_tree_id = written_commit_tree
                      author = commit_data['author']
                      committer = commit_data['committer']
                      message = commit_data['message']
                      date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
                      new_commit_id = self.create_commit(wire, author, committer, message, branch,
                                                         new_tree_id, date_args=date_args, parents=commit_parents)
                      # libgit2, ensure the branch is there and exists
                      self.create_branch(wire, branch, new_commit_id)
                      # libgit2, set new ref to this created commit
                      self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
                      return new_commit_id
                  @reraise_safe_exceptions
                  def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
                      if url != 'default' and '://' not in url:
                          client = LocalGitClient(url)
                      else:
                          url_obj = url_parser(safe_bytes(url))
                          o = self._build_opener(url)
                          url = url_obj.authinfo()[0]
                          client = HttpGitClient(base_url=url, opener=o)
                      repo = self._factory.repo(wire)
                      determine_wants = repo.object_store.determine_wants_all
                      if refs:
                          refs: list[bytes] = [ascii_bytes(x) for x in refs]
                          def determine_wants_requested(_remote_refs):
                              determined = []
                              for ref_name, ref_hash in _remote_refs.items():
                                  bytes_ref_name = safe_bytes(ref_name)
                                  if bytes_ref_name in refs:
                                      bytes_ref_hash = safe_bytes(ref_hash)
                                      determined.append(bytes_ref_hash)
                              return determined
                          # swap with our custom requested wants
                          determine_wants = determine_wants_requested
                      try:
                          remote_refs = client.fetch(
                              path=url, target=repo, determine_wants=determine_wants)
                      except NotGitRepository as e:
                          log.warning(
                              'Trying to fetch from "%s" failed, not a Git repository.', url)
                          # Exception can contain unicode which we convert
                          raise exceptions.AbortException(e)(repr(e))
                      # mikhail: client.fetch() returns all the remote refs, but fetches only
                      # refs filtered by `determine_wants` function. We need to filter result
                      # as well
                      if refs:
                          remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
                      if apply_refs:
                          # TODO: johbo: Needs proper test coverage with a git repository
                          # that contains a tag object, so that we would end up with
                          # a peeled ref at this point.
                          for k in remote_refs:
                              if k.endswith(PEELED_REF_MARKER):
                                  log.debug("Skipping peeled reference %s", k)
                                  continue
                              repo[k] = remote_refs[k]
                          if refs and not update_after:
                              # update to ref
                              # mikhail: explicitly set the head to the last ref.
                              update_to_ref = refs[-1]
                              if isinstance(update_after, str):
                                  update_to_ref = update_after
                              repo[HEAD_MARKER] = remote_refs[update_to_ref]
                      if update_after:
                          # we want to check out HEAD
                          repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
                          index.build_index_from_tree(repo.path, repo.index_path(),
                                                      repo.object_store, repo[HEAD_MARKER].tree)
                      if isinstance(remote_refs, FetchPackResult):
                          return remote_refs.refs
                      return remote_refs
                  @reraise_safe_exceptions
                  def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
                      self._factory.repo(wire)
                      if refs and not isinstance(refs, (list, tuple)):
                          refs = [refs]
                      config = self._wire_to_config(wire)
                      # get all remote refs we'll use to fetch later
                      cmd = ['ls-remote']
                      if not all_refs:
                          cmd += ['--heads', '--tags']
                      cmd += [url]
                      output, __ = self.run_git_command(
                          wire, cmd, fail_on_stderr=False,
                          _copts=self._remote_conf(config),
                          extra_env={'GIT_TERMINAL_PROMPT': '0'})
                      remote_refs = collections.OrderedDict()
                      fetch_refs = []
                      for ref_line in output.splitlines():
                          sha, ref = ref_line.split(b'\t')
                          sha = sha.strip()
                          if ref in remote_refs:
                              # duplicate, skip
                              continue
                          if ref.endswith(PEELED_REF_MARKER):
                              log.debug("Skipping peeled reference %s", ref)
                              continue
                          # don't sync HEAD
                          if ref in [HEAD_MARKER]:
                              continue
                          remote_refs[ref] = sha
                          if refs and sha in refs:
                              # we filter fetch using our specified refs
                              fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
                          elif not refs:
                              fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
                      log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
                      if fetch_refs:
                          for chunk in more_itertools.chunked(fetch_refs, 128):
                              fetch_refs_chunks = list(chunk)
                              log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
                              self.run_git_command(
                                  wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
                                  fail_on_stderr=False,
                                  _copts=self._remote_conf(config),
                                  extra_env={'GIT_TERMINAL_PROMPT': '0'})
                          if kwargs.get('sync_large_objects'):
                              self.run_git_command(
                                  wire, ['lfs', 'fetch', url, '--all'],
                                  fail_on_stderr=False,
                                  _copts=self._remote_conf(config),
                              )
                      return remote_refs
                  @reraise_safe_exceptions
                  def sync_push(self, wire, url, refs=None, **kwargs):
                      if not self.check_url(url, wire):
                          return
                      config = self._wire_to_config(wire)
                      self._factory.repo(wire)
                      self.run_git_command(
                          wire, ['push', url, '--mirror'], fail_on_stderr=False,
                          _copts=self._remote_conf(config),
                          extra_env={'GIT_TERMINAL_PROMPT': '0'})
                      if kwargs.get('sync_large_objects'):
                          self.run_git_command(
                              wire, ['lfs', 'push', url, '--all'],
                              fail_on_stderr=False,
                              _copts=self._remote_conf(config),
                          )
                  @reraise_safe_exceptions
                  def get_remote_refs(self, wire, url):
                      repo = Repo(url)
                      return repo.get_refs()
                  @reraise_safe_exceptions
                  def get_description(self, wire):
                      repo = self._factory.repo(wire)
                      return repo.get_description()
                  @reraise_safe_exceptions
                  def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
                      origin_repo_path = wire['path']
                      repo = self._factory.repo(wire)
                      # fetch from other_repo_path to our origin repo
                      LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
                      wire_remote = wire.copy()
                      wire_remote['path'] = other_repo_path
                      repo_remote = self._factory.repo(wire_remote)
                      # fetch from origin_repo_path to our remote repo
                      LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
                      revs = [
                          x.commit.id
                          for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
                      return revs
                  @reraise_safe_exceptions
                  def get_object(self, wire, sha, maybe_unreachable=False):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_object(_context_uid, _repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
                              try:
                                  commit = repo.revparse_single(sha)
                              except KeyError:
                                  # NOTE(marcink): KeyError doesn't give us any meaningful information
                                  # here, we instead give something more explicit
                                  e = exceptions.RefNotFoundException('SHA: %s not found', sha)
                                  raise exceptions.LookupException(e)(missing_commit_err)
                              except ValueError as e:
                                  raise exceptions.LookupException(e)(missing_commit_err)
                              is_tag = False
                              if isinstance(commit, pygit2.Tag):
                                  commit = repo.get(commit.target)
                                  is_tag = True
                              check_dangling = True
                              if is_tag:
                                  check_dangling = False
                              if check_dangling and maybe_unreachable:
                                  check_dangling = False
                              # we used a reference and it parsed means we're not having a dangling commit
                              if sha != commit.hex:
                                  check_dangling = False
                              if check_dangling:
                                  # check for dangling commit
                                  for branch in repo.branches.with_commit(commit.hex):
                                      if branch:
                                          break
                                  else:
                                      # NOTE(marcink): Empty error doesn't give us any meaningful information
                                      # here, we instead give something more explicit
                                      e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
                                      raise exceptions.LookupException(e)(missing_commit_err)
                              commit_id = commit.hex
                              type_str = commit.type_str
                              return {
                                  'id': commit_id,
                                  'type': type_str,
                                  'commit_id': commit_id,
                                  'idx': 0
                              }
                      return _get_object(context_uid, repo_id, sha)
                  @reraise_safe_exceptions
                  def get_refs(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_refs(_context_uid, _repo_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              regex = re.compile('^refs/(heads|tags)/')
                              return {x.name: x.target.hex for x in
                                      [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
                      return _get_refs(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def get_branch_pointers(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_branch_pointers(_context_uid, _repo_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          regex = re.compile('^refs/heads')
                          with repo_init as repo:
                              branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
                              return {x.target.hex: x.shorthand for x in branches}
                      return _get_branch_pointers(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def head(self, wire, show_exc=True):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _head(_context_uid, _repo_id, _show_exc):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              try:
                                  return repo.head.peel().hex
                              except Exception:
                                  if show_exc:
                                      raise
                      return _head(context_uid, repo_id, show_exc)
                  @reraise_safe_exceptions
                  def init(self, wire):
                      repo_path = safe_str(wire['path'])
                      os.makedirs(repo_path, mode=0o755)
                      pygit2.init_repository(repo_path, bare=False)
                  @reraise_safe_exceptions
                  def init_bare(self, wire):
                      repo_path = safe_str(wire['path'])
                      os.makedirs(repo_path, mode=0o755)
                      pygit2.init_repository(repo_path, bare=True)
                  @reraise_safe_exceptions
                  def revision(self, wire, rev):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _revision(_context_uid, _repo_id, _rev):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[rev]
                              obj_data = {
                                  'id': commit.id.hex,
                              }
                              # tree objects itself don't have tree_id attribute
                              if hasattr(commit, 'tree_id'):
                                  obj_data['tree'] = commit.tree_id.hex
                              return obj_data
                      return _revision(context_uid, repo_id, rev)
                  @reraise_safe_exceptions
                  def date(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _date(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if hasattr(commit, 'commit_time'):
                                  commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
                              else:
                                  commit = commit.get_object()
                                  commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
                              # TODO(marcink): check dulwich difference of offset vs timezone
                              return [commit_time, commit_time_offset]
                      return _date(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def author(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _author(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if hasattr(commit, 'author'):
                                  author = commit.author
                              else:
                                  author = commit.get_object().author
                              if author.email:
                                  return f"{author.name} <{author.email}>"
                              try:
                                  return f"{author.name}"
                              except Exception:
                                  return f"{safe_str(author.raw_name)}"
                      return _author(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def message(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _message(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              return commit.message
                      return _message(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def parents(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _parents(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if hasattr(commit, 'parent_ids'):
                                  parent_ids = commit.parent_ids
                              else:
                                  parent_ids = commit.get_object().parent_ids
                              return [x.hex for x in parent_ids]
                      return _parents(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def children(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      head = self.head(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _children(_repo_id, _commit_id):
                          output, __ = self.run_git_command(
                              wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
                          child_ids = []
                          pat = re.compile(fr'^{commit_id}')
                          for line in output.splitlines():
                              line = safe_str(line)
                              if pat.match(line):
                                  found_ids = line.split(' ')[1:]
                                  child_ids.extend(found_ids)
                                  break
                          return child_ids
                      return _children(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def set_refs(self, wire, key, value):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo.references.create(key, value, force=True)
                  @reraise_safe_exceptions
                  def update_refs(self, wire, key, value):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          if key not in repo.references:
                              raise ValueError(f'Reference {key} not found in the repository')
                          repo.references.create(key, value, force=True)
                  @reraise_safe_exceptions
                  def create_branch(self, wire, branch_name, commit_id, force=False):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          if commit_id:
                              commit = repo[commit_id]
                          else:
                              # if commit is not given  just use the HEAD
                              commit = repo.head()
                          if force:
                              repo.branches.local.create(branch_name, commit, force=force)
                          elif not repo.branches.get(branch_name):
                              # create only if that branch isn't existing
                              repo.branches.local.create(branch_name, commit, force=force)
                  @reraise_safe_exceptions
                  def remove_ref(self, wire, key):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo.references.delete(key)
                  @reraise_safe_exceptions
                  def tag_remove(self, wire, tag_name):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          key = f'refs/tags/{tag_name}'
                          repo.references.delete(key)
                  @reraise_safe_exceptions
                  def tree_changes(self, wire, source_id, target_id):
                      repo = self._factory.repo(wire)
                      # source can be empty
                      source_id = safe_bytes(source_id if source_id else b'')
                      target_id = safe_bytes(target_id)
                      source = repo[source_id].tree if source_id else None
                      target = repo[target_id].tree
                      result = repo.object_store.tree_changes(source, target)
                      added = set()
                      modified = set()
                      deleted = set()
                      for (old_path, new_path), (_, _), (_, _) in list(result):
                          if new_path and old_path:
                              modified.add(new_path)
                          elif new_path and not old_path:
                              added.add(new_path)
                          elif not new_path and old_path:
                              deleted.add(old_path)
                      return list(added), list(modified), list(deleted)
                  @reraise_safe_exceptions
                  def tree_and_type_for_path(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              try:
                                  tree = commit.tree[path]
                              except KeyError:
                                  return None, None, None
                              return tree.id.hex, tree.type_str, tree.filemode
                      return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def tree_items(self, wire, tree_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tree_items(_repo_id, _tree_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              try:
                                  tree = repo[tree_id]
                              except KeyError:
                                  raise ObjectMissing(f'No tree with id: {tree_id}')
                              result = []
                              for item in tree:
                                  item_sha = item.hex
                                  item_mode = item.filemode
                                  item_type = item.type_str
                                  if item_type == 'commit':
                                      # NOTE(marcink): submodules we translate to 'link' for backward compat
                                      item_type = 'link'
                                  result.append((item.name, item_mode, item_sha, item_type))
                              return result
                      return _tree_items(repo_id, tree_id)
                  @reraise_safe_exceptions
                  def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
                      """
                      Old version that uses subprocess to call diff
                      """
                      flags = [
                          f'-U{context}', '--patch',
                          '--binary',
                          '--find-renames',
                          '--no-indent-heuristic',
                          # '--indent-heuristic',
                          #'--full-index',
                          #'--abbrev=40'
                      ]
                      if opt_ignorews:
                          flags.append('--ignore-all-space')
                      if commit_id_1 == self.EMPTY_COMMIT:
                          cmd = ['show'] + flags + [commit_id_2]
                      else:
                          cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
                      if file_filter:
                          cmd.extend(['--', file_filter])
                      diff, __ = self.run_git_command(wire, cmd)
                      # If we used 'show' command, strip first few lines (until actual diff
                      # starts)
                      if commit_id_1 == self.EMPTY_COMMIT:
                          lines = diff.splitlines()
                          x = 0
                          for line in lines:
                              if line.startswith(b'diff'):
                                  break
                              x += 1
                          # Append new line just like 'diff' command do
                          diff = '\n'.join(lines[x:]) + '\n'
                      return diff
                  @reraise_safe_exceptions
                  def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          swap = True
                          flags = 0
                          flags |= pygit2.GIT_DIFF_SHOW_BINARY
                          if opt_ignorews:
                              flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
                          if commit_id_1 == self.EMPTY_COMMIT:
                              comm1 = repo[commit_id_2]
                              diff_obj = comm1.tree.diff_to_tree(
                                  flags=flags, context_lines=context, swap=swap)
                          else:
                              comm1 = repo[commit_id_2]
                              comm2 = repo[commit_id_1]
                              diff_obj = comm1.tree.diff_to_tree(
                                  comm2.tree, flags=flags, context_lines=context, swap=swap)
                          similar_flags = 0
                          similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
                          diff_obj.find_similar(flags=similar_flags)
                          if file_filter:
                              for p in diff_obj:
                                  if p.delta.old_file.path == file_filter:
                                      return BytesEnvelope(p.data) or BytesEnvelope(b'')
                              # fo matching path == no diff
                              return BytesEnvelope(b'')
                          return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
                  @reraise_safe_exceptions
                  def node_history(self, wire, commit_id, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
                          # optimize for n==1, rev-list is much faster for that use-case
                          if limit == 1:
                              cmd = ['rev-list', '-1', commit_id, '--', path]
                          else:
                              cmd = ['log']
                              if limit:
                                  cmd.extend(['-n', str(safe_int(limit, 0))])
                              cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
                          output, __ = self.run_git_command(wire, cmd)
                          commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
                          return [x for x in commit_ids]
                      return _node_history(context_uid, repo_id, commit_id, path, limit)
                  @reraise_safe_exceptions
                  def node_annotate_legacy(self, wire, commit_id, path):
                      # note: replaced by pygit2 implementation
                      cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
                      # -l     ==> outputs long shas (and we need all 40 characters)
                      # --root ==> doesn't put '^' character for boundaries
                      # -r commit_id ==> blames for the given commit
                      output, __ = self.run_git_command(wire, cmd)
                      result = []
                      for i, blame_line in enumerate(output.splitlines()[:-1]):
                          line_no = i + 1
                          blame_commit_id, line = re.split(rb' ', blame_line, 1)
                          result.append((line_no, blame_commit_id, line))
                      return result
                  @reraise_safe_exceptions
                  def node_annotate(self, wire, commit_id, path):
                      result_libgit = []
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blame_obj = repo.blame(path, newest_commit=commit_id)
                          file_content = commit.tree[path].data
                          for i, line in enumerate(splitnewlines(file_content)):
                              line_no = i + 1
                              hunk = blame_obj.for_line(line_no)
                              blame_commit_id = hunk.final_commit_id.hex
                              result_libgit.append((line_no, blame_commit_id, line))
                      return BinaryEnvelope(result_libgit)
                  @reraise_safe_exceptions
                  def update_server_info(self, wire, force=False):
                      cmd = ['update-server-info']
                      if force:
                          cmd += ['--force']
                      output, __ = self.run_git_command(wire, cmd)
                      return output.splitlines()
                  @reraise_safe_exceptions
                  def get_all_commit_ids(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_all_commit_ids(_context_uid, _repo_id):
                          cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
                          try:
                              output, __ = self.run_git_command(wire, cmd)
                              return output.splitlines()
                          except Exception:
                              # Can be raised for empty repositories
                              return []
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
                          results = []
                          with repo_init as repo:
                              for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
                                  results.append(commit.id.hex)
                      return _get_all_commit_ids(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def run_git_command(self, wire, cmd, **opts):
                      path = wire.get('path', None)
                      debug_mode = rhodecode.ConfigGet().get_bool('debug')
                      if path and os.path.isdir(path):
                          opts['cwd'] = path
                      if '_bare' in opts:
                          _copts = []
                          del opts['_bare']
                      else:
                          _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
                      safe_call = False
                      if '_safe' in opts:
                          # no exc on failure
                          del opts['_safe']
                          safe_call = True
                      if '_copts' in opts:
                          _copts.extend(opts['_copts'] or [])
                          del opts['_copts']
                      gitenv = os.environ.copy()
                      gitenv.update(opts.pop('extra_env', {}))
                      # need to clean fix GIT_DIR !
                      if 'GIT_DIR' in gitenv:
                          del gitenv['GIT_DIR']
                      gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
                      gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
                      cmd = [settings.GIT_EXECUTABLE()] + _copts + cmd
                      _opts = {'env': gitenv, 'shell': False}
                      proc = None
                      try:
                          _opts.update(opts)
                          proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
                          return b''.join(proc), b''.join(proc.stderr)
                      except OSError as err:
                          cmd = ' '.join(map(safe_str, cmd))  # human friendly CMD
                          call_opts = {}
                          if debug_mode:
                              call_opts = _opts
                          tb_err = ("Couldn't run git command ({}).\n"
                                    "Original error was:{}\n"
                                    "Call options:{}\n"
                                    .format(cmd, err, call_opts))
                          log.exception(tb_err)
                          if safe_call:
                              return '', err
                          else:
                              raise exceptions.VcsException()(tb_err)
                      finally:
                          if proc:
                              proc.close()
                  @reraise_safe_exceptions
                  def install_hooks(self, wire, force=False):
                      from vcsserver.hook_utils import install_git_hooks
                      bare = self.bare(wire)
                      path = wire['path']
                      binary_dir = settings.BINARY_DIR
                      if binary_dir:
                          os.path.join(binary_dir, 'python3')
                      return install_git_hooks(path, bare, force_create=force)
                  @reraise_safe_exceptions
                  def get_hooks_info(self, wire):
                      from vcsserver.hook_utils import (
                          get_git_pre_hook_version, get_git_post_hook_version)
                      bare = self.bare(wire)
                      path = wire['path']
                      return {
                          'pre_version': get_git_pre_hook_version(path, bare),
                          'post_version': get_git_post_hook_version(path, bare),
                      }
                  @reraise_safe_exceptions
                  def set_head_ref(self, wire, head_name):
                      log.debug('Setting refs/head to `%s`', head_name)
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo.set_head(f'refs/heads/{head_name}')
                      return [head_name] + [f'set HEAD to refs/heads/{head_name}']
                  @reraise_safe_exceptions
                  def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
                                   archive_dir_name, commit_id, cache_config):
                      def file_walker(_commit_id, path):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if path in ['', '/']:
                                  tree = commit.tree
                              else:
                                  tree = commit.tree[path.rstrip('/')]
                                  tree_id = tree.id.hex
                                  try:
                                      tree = repo[tree_id]
                                  except KeyError:
                                      raise ObjectMissing(f'No tree with id: {tree_id}')
                              index = LibGit2Index.Index()
                              index.read_tree(tree)
                              file_iter = index
                              for file_node in file_iter:
                                  file_path = file_node.path
                                  mode = file_node.mode
                                  is_link = stat.S_ISLNK(mode)
                                  if mode == pygit2.GIT_FILEMODE_COMMIT:
                                      log.debug('Skipping path %s as a commit node', file_path)
                                      continue
                                  yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
                      return store_archive_in_cache(
                          file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)

vcsserver/remote/hg_remote.py

0 +17 -13

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import binascii
              import io
              import logging
              import stat
              import sys
              import urllib.request
              import urllib.parse
              import hashlib
              from hgext import largefiles, rebase
              from mercurial import commands
              from mercurial import unionrepo
              from mercurial import verify
              from mercurial import repair
              from mercurial.error import AmbiguousPrefixLookupError
+             from mercurial.utils.urlutil import path as hg_path
              import vcsserver
              from vcsserver import exceptions
              from vcsserver.base import (
                  RepoFactory,
                  obfuscate_qs,
                  raise_from_original,
                  store_archive_in_cache,
                  ArchiveNode,
                  BytesEnvelope,
                  BinaryEnvelope,
              )
              from vcsserver.hgcompat import (
                  archival,
                  bin,
                  clone,
                  config as hgconfig,
                  diffopts,
                  hex,
                  get_ctx,
                  hg_url as url_parser,
                  httpbasicauthhandler,
                  httpdigestauthhandler,
-                 makepeer,
+                 make_peer,
                  instance,
                  match,
                  memctx,
                  exchange,
                  memfilectx,
                  nullrev,
                  hg_merge,
                  patch,
                  peer,
                  revrange,
                  ui,
                  hg_tag,
                  Abort,
                  LookupError,
                  RepoError,
                  RepoLookupError,
                  InterventionRequired,
                  RequirementError,
                  alwaysmatcher,
                  patternmatcher,
                  hgext_strip,
              )
-             from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str
+             from vcsserver.lib.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str
              from vcsserver.vcs_base import RemoteBase
              from vcsserver.config import hooks as hooks_config
              from vcsserver.lib.exc_tracking import format_exc
              log = logging.getLogger(__name__)
-             def make_ui_from_config(repo_config):
+             def make_ui_from_config(repo_config, interactive=True):
                  class LoggingUI(ui.ui):
                      def status(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.info(' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).status(*msg, **opts)
                      def warn(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).warn(*msg, **opts)
                      def error(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.error('ui_logger:'+' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).error(*msg, **opts)
                      def note(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.info('ui_logger:'+' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).note(*msg, **opts)
                      def debug(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).debug(*msg, **opts)
                  baseui = LoggingUI()
                  # clean the baseui object
                  baseui._ocfg = hgconfig.config()
                  baseui._ucfg = hgconfig.config()
                  baseui._tcfg = hgconfig.config()
                  for section, option, value in repo_config:
                      baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value))
                  # make our hgweb quiet so it doesn't print output
                  baseui.setconfig(b'ui', b'quiet', b'true')
                  baseui.setconfig(b'ui', b'paginate', b'never')
                  # for better Error reporting of Mercurial
                  baseui.setconfig(b'ui', b'message-output', b'stderr')
                  # force mercurial to only use 1 thread, otherwise it may try to set a
                  # signal in a non-main thread, thus generating a ValueError.
                  baseui.setconfig(b'worker', b'numcpus', 1)
                  # If there is no config for the largefiles extension, we explicitly disable
                  # it here. This overrides settings from repositories hgrc file. Recent
                  # mercurial versions enable largefiles in hgrc on clone from largefile
                  # repo.
                  if not baseui.hasconfig(b'extensions', b'largefiles'):
                      log.debug('Explicitly disable largefiles extension for repo.')
                      baseui.setconfig(b'extensions', b'largefiles', b'!')
+                 baseui.setconfig(b'ui', b'interactive', b'true' if interactive else b'false')
                  return baseui
              def reraise_safe_exceptions(func):
                  """Decorator for converting mercurial exceptions to something neutral."""
                  def wrapper(*args, **kwargs):
                      try:
                          return func(*args, **kwargs)
                      except (Abort, InterventionRequired) as e:
                          raise_from_original(exceptions.AbortException(e), e)
                      except RepoLookupError as e:
                          raise_from_original(exceptions.LookupException(e), e)
                      except RequirementError as e:
                          raise_from_original(exceptions.RequirementException(e), e)
                      except RepoError as e:
                          raise_from_original(exceptions.VcsException(e), e)
                      except LookupError as e:
                          raise_from_original(exceptions.LookupException(e), e)
                      except Exception as e:
                          if not hasattr(e, '_vcs_kind'):
                              log.exception("Unhandled exception in hg remote call")
                              raise_from_original(exceptions.UnhandledException(e), e)
                          raise
                  return wrapper
              class MercurialFactory(RepoFactory):
                  repo_type = 'hg'
                  def _create_config(self, config, hooks=True):
                      if not hooks:
                          hooks_to_clean = {
                              hooks_config.HOOK_REPO_SIZE,
                              hooks_config.HOOK_PRE_PULL,
                              hooks_config.HOOK_PULL,
                              hooks_config.HOOK_PRE_PUSH,
                              # TODO: what about PRETXT, this was disabled in pre 5.0.0
                              hooks_config.HOOK_PRETX_PUSH,
                          }
                          new_config = []
                          for section, option, value in config:
                              if section == 'hooks' and option in hooks_to_clean:
                                  continue
                              new_config.append((section, option, value))
                          config = new_config
                      baseui = make_ui_from_config(config)
                      return baseui
                  def _create_repo(self, wire, create):
                      baseui = self._create_config(wire["config"])
                      repo = instance(baseui, safe_bytes(wire["path"]), create)
                      log.debug('repository created: got HG object: %s', repo)
                      return repo
                  def repo(self, wire, create=False):
                      """
                      Get a repository instance for the given path.
                      """
                      return self._create_repo(wire, create)
              def patch_ui_message_output(baseui):
                  baseui.setconfig(b'ui', b'quiet', b'false')
                  output = io.BytesIO()
                  def write(data, **unused_kwargs):
                      output.write(data)
                  baseui.status = write
                  baseui.write = write
                  baseui.warn = write
                  baseui.debug = write
                  return baseui, output
              def get_obfuscated_url(url_obj):
                  url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
                  url_obj.query = obfuscate_qs(url_obj.query)
                  obfuscated_uri = str(url_obj)
                  return obfuscated_uri
              def normalize_url_for_hg(url: str):
                  _proto = None
                  if '+' in url[:url.find('://')]:
                      _proto = url[0:url.find('+')]
                      url = url[url.find('+') + 1:]
                  return url, _proto
              class HgRemote(RemoteBase):
                  def __init__(self, factory):
                      self._factory = factory
                      self._bulk_methods = {
                          "affected_files": self.ctx_files,
                          "author": self.ctx_user,
                          "branch": self.ctx_branch,
                          "children": self.ctx_children,
                          "date": self.ctx_date,
                          "message": self.ctx_description,
                          "parents": self.ctx_parents,
                          "status": self.ctx_status,
                          "obsolete": self.ctx_obsolete,
                          "phase": self.ctx_phase,
                          "hidden": self.ctx_hidden,
                          "_file_paths": self.ctx_list,
                      }
                      self._bulk_file_methods = {
                          "size": self.fctx_size,
                          "data": self.fctx_node_data,
                          "flags": self.fctx_flags,
                          "is_binary": self.is_binary,
                          "md5": self.md5_hash,
                      }
                  def _get_ctx(self, repo, ref):
                      return get_ctx(repo, ref)
                  @reraise_safe_exceptions
                  def discover_hg_version(self):
                      from mercurial import util
                      return safe_str(util.version())
                  @reraise_safe_exceptions
                  def is_empty(self, wire):
                      repo = self._factory.repo(wire)
                      try:
                          return len(repo) == 0
                      except Exception:
                          log.exception("failed to read object_store")
                          return False
                  @reraise_safe_exceptions
                  def bookmarks(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bookmarks(_context_uid, _repo_id):
                          repo = self._factory.repo(wire)
                          return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo._bookmarks.items()}
                      return _bookmarks(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def branches(self, wire, normal, closed):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _branches(_context_uid, _repo_id, _normal, _closed):
                          repo = self._factory.repo(wire)
                          iter_branches = repo.branchmap().iterbranches()
                          bt = {}
                          for branch_name, _heads, tip_node, is_closed in iter_branches:
                              if normal and not is_closed:
                                  bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
                              if closed and is_closed:
                                  bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
                          return bt
                      return _branches(context_uid, repo_id, normal, closed)
                  @reraise_safe_exceptions
                  def bulk_request(self, wire, commit_id, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_request(_repo_id, _commit_id, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_methods[attr]
                                  wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  result[attr] = method(wire, commit_id)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(
                                      f'Unknown bulk attribute: "{attr}"')
                          return result
                      return _bulk_request(repo_id, commit_id, sorted(pre_load))
                  @reraise_safe_exceptions
                  def ctx_branch(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_branch(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.branch()
                      return _ctx_branch(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_date(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_date(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.date()
                      return _ctx_date(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_description(self, wire, revision):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      return ctx.description()
                  @reraise_safe_exceptions
                  def ctx_files(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_files(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.files()
                      return _ctx_files(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_list(self, path, revision):
                      repo = self._factory.repo(path)
                      ctx = self._get_ctx(repo, revision)
                      return list(ctx)
                  @reraise_safe_exceptions
                  def ctx_parents(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_parents(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return [parent.hex() for parent in ctx.parents()
                                  if not (parent.hidden() or parent.obsolete())]
                      return _ctx_parents(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_children(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_children(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return [child.hex() for child in ctx.children()
                                  if not (child.hidden() or child.obsolete())]
                      return _ctx_children(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_phase(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_phase(_context_uid, _repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          # public=0, draft=1, secret=3
                          return ctx.phase()
                      return _ctx_phase(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_obsolete(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_obsolete(_context_uid, _repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.obsolete()
                      return _ctx_obsolete(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_hidden(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_hidden(_context_uid, _repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.hidden()
                      return _ctx_hidden(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_substate(self, wire, revision):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      return ctx.substate
                  @reraise_safe_exceptions
                  def ctx_status(self, wire, revision):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      status = repo[ctx.p1().node()].status(other=ctx.node())
                      # object of status (odd, custom named tuple in mercurial) is not
                      # correctly serializable, we make it a list, as the underling
                      # API expects this to be a list
                      return list(status)
                  @reraise_safe_exceptions
                  def ctx_user(self, wire, revision):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      return ctx.user()
                  @reraise_safe_exceptions
                  def check_url(self, url, config):
                      url, _proto = normalize_url_for_hg(url)
                      url_obj = url_parser(safe_bytes(url))
                      test_uri = safe_str(url_obj.authinfo()[0])
                      authinfo = url_obj.authinfo()[1]
                      obfuscated_uri = get_obfuscated_url(url_obj)
                      log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
                      handlers = []
                      if authinfo:
                          # create a password manager
                          passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
                          passmgr.add_password(*convert_to_str(authinfo))
                          handlers.extend((httpbasicauthhandler(passmgr),
                                           httpdigestauthhandler(passmgr)))
                      o = urllib.request.build_opener(*handlers)
                      o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
                                      ('Accept', 'application/mercurial-0.1')]
                      q = {"cmd": 'between'}
                      q.update({'pairs': "{}-{}".format('0' * 40, '0' * 40)})
                      qs = f'?{urllib.parse.urlencode(q)}'
                      cu = f"{test_uri}{qs}"
                      try:
                          req = urllib.request.Request(cu, None, {})
                          log.debug("Trying to open URL %s", obfuscated_uri)
                          resp = o.open(req)
                          if resp.code != 200:
                              raise exceptions.URLError()('Return Code is not 200')
                      except Exception as e:
                          log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
                          # means it cannot be cloned
                          raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
                      # now check if it's a proper hg repo, but don't do it for svn
                      try:
                          if _proto == 'svn':
                              pass
                          else:
                              # check for pure hg repos
                              log.debug(
                                  "Verifying if URL is a Mercurial repository: %s", obfuscated_uri)
-                             ui = make_ui_from_config(config)
-                             peer_checker = makepeer(ui, safe_bytes(url))
+                             # Create repo path with custom mercurial path object
+                             ui = make_ui_from_config(config, interactive=False)
+                             repo_path = hg_path(ui=ui, rawloc=safe_bytes(url))
+                             peer_checker = make_peer(ui, repo_path, False)
                              peer_checker.lookup(b'tip')
                      except Exception as e:
                          log.warning("URL is not a valid Mercurial repository: %s",
                                      obfuscated_uri)
                          raise exceptions.URLError(e)(
                              f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
                      log.info("URL is a valid Mercurial repository: %s", obfuscated_uri)
                      return True
                  @reraise_safe_exceptions
                  def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_git, opt_ignorews, context):
                      repo = self._factory.repo(wire)
                      if file_filter:
                          # unpack the file-filter
                          repo_path, node_path = file_filter
                          match_filter = match(safe_bytes(repo_path), b'', [safe_bytes(node_path)])
                      else:
                          match_filter = file_filter
                      opts = diffopts(git=opt_git, ignorews=opt_ignorews, context=context, showfunc=1)
                      try:
                          diff_iter = patch.diff(
                              repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
                          return BytesEnvelope(b"".join(diff_iter))
                      except RepoLookupError as e:
                          raise exceptions.LookupException(e)()
                  @reraise_safe_exceptions
                  def node_history(self, wire, revision, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history(_context_uid, _repo_id, _revision, _path, _limit):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
                          fctx = ctx.filectx(safe_bytes(path))
                          def history_iter():
                              limit_rev = fctx.rev()
                              for fctx_candidate in reversed(list(fctx.filelog())):
                                  f_obj = fctx.filectx(fctx_candidate)
                                  # NOTE: This can be problematic...we can hide ONLY history node resulting in empty history
                                  _ctx = f_obj.changectx()
                                  if _ctx.hidden() or _ctx.obsolete():
                                      continue
                                  if limit_rev >= f_obj.rev():
                                      yield f_obj
                          history = []
                          for cnt, obj in enumerate(history_iter()):
                              if limit and cnt >= limit:
                                  break
                              history.append(hex(obj.node()))
                          return [x for x in history]
                      return _node_history(context_uid, repo_id, revision, path, limit)
                  @reraise_safe_exceptions
                  def node_history_until(self, wire, revision, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history_until(_context_uid, _repo_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
                          fctx = ctx.filectx(safe_bytes(path))
                          file_log = list(fctx.filelog())
                          if limit:
                              # Limit to the last n items
                              file_log = file_log[-limit:]
                          return [hex(fctx.filectx(cs).node()) for cs in reversed(file_log)]
                      return _node_history_until(context_uid, repo_id, revision, path, limit)
                  @reraise_safe_exceptions
                  def bulk_file_request(self, wire, commit_id, path, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_file_methods[attr]
                                  wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  result[attr] = method(wire, _commit_id, _path)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
                          return result
                      return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
                  @reraise_safe_exceptions
                  def fctx_annotate(self, wire, revision, path):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      fctx = ctx.filectx(safe_bytes(path))
                      result = []
                      for i, annotate_obj in enumerate(fctx.annotate(), 1):
                          ln_no = i
                          sha = hex(annotate_obj.fctx.node())
                          content = annotate_obj.text
                          result.append((ln_no, ascii_str(sha), content))
                      return BinaryEnvelope(result)
                  @reraise_safe_exceptions
                  def fctx_node_data(self, wire, revision, path):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      fctx = ctx.filectx(safe_bytes(path))
                      return BytesEnvelope(fctx.data())
                  @reraise_safe_exceptions
                  def fctx_flags(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _fctx_flags(_repo_id, _commit_id, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          fctx = ctx.filectx(safe_bytes(path))
                          return fctx.flags()
                      return _fctx_flags(repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def fctx_size(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _fctx_size(_repo_id, _revision, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          fctx = ctx.filectx(safe_bytes(path))
                          return fctx.size()
                      return _fctx_size(repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def get_all_commit_ids(self, wire, name):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_all_commit_ids(_context_uid, _repo_id, _name):
                          repo = self._factory.repo(wire)
                          revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()]
                          return revs
                      return _get_all_commit_ids(context_uid, repo_id, name)
                  @reraise_safe_exceptions
                  def get_config_value(self, wire, section, name, untrusted=False):
                      repo = self._factory.repo(wire)
                      return repo.ui.config(ascii_bytes(section), ascii_bytes(name), untrusted=untrusted)
                  @reraise_safe_exceptions
                  def is_large_file(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_large_file(_context_uid, _repo_id, _commit_id, _path):
                          return largefiles.lfutil.isstandin(safe_bytes(path))
                      return _is_large_file(context_uid, repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def is_binary(self, wire, revision, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_binary(_repo_id, _sha, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
                          fctx = ctx.filectx(safe_bytes(path))
                          return fctx.isbinary()
                      return _is_binary(repo_id, revision, path)
                  @reraise_safe_exceptions
                  def md5_hash(self, wire, revision, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _md5_hash(_repo_id, _sha, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
                          fctx = ctx.filectx(safe_bytes(path))
                          return hashlib.md5(fctx.data()).hexdigest()
                      return _md5_hash(repo_id, revision, path)
                  @reraise_safe_exceptions
                  def in_largefiles_store(self, wire, sha):
                      repo = self._factory.repo(wire)
                      return largefiles.lfutil.instore(repo, sha)
                  @reraise_safe_exceptions
                  def in_user_cache(self, wire, sha):
                      repo = self._factory.repo(wire)
                      return largefiles.lfutil.inusercache(repo.ui, sha)
                  @reraise_safe_exceptions
                  def store_path(self, wire, sha):
                      repo = self._factory.repo(wire)
                      return largefiles.lfutil.storepath(repo, sha)
                  @reraise_safe_exceptions
                  def link(self, wire, sha, path):
                      repo = self._factory.repo(wire)
                      largefiles.lfutil.link(
                          largefiles.lfutil.usercachepath(repo.ui, sha), path)
                  @reraise_safe_exceptions
                  def localrepository(self, wire, create=False):
                      self._factory.repo(wire, create=create)
                  @reraise_safe_exceptions
                  def lookup(self, wire, revision, both):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _lookup(_context_uid, _repo_id, _revision, _both):
                          repo = self._factory.repo(wire)
                          rev = _revision
                          if isinstance(rev, int):
                              # NOTE(marcink):
                              # since Mercurial doesn't support negative indexes properly
                              # we need to shift accordingly by one to get proper index, e.g
                              # repo[-1] => repo[-2]
                              # repo[0]  => repo[-1]
                              if rev <= 0:
                                  rev = rev + -1
                          try:
                              ctx = self._get_ctx(repo, rev)
                          except AmbiguousPrefixLookupError:
                              e = RepoLookupError(rev)
                              e._org_exc_tb = format_exc(sys.exc_info())
                              raise exceptions.LookupException(e)(rev)
                          except (TypeError, RepoLookupError, binascii.Error) as e:
                              e._org_exc_tb = format_exc(sys.exc_info())
                              raise exceptions.LookupException(e)(rev)
                          except LookupError as e:
                              e._org_exc_tb = format_exc(sys.exc_info())
                              raise exceptions.LookupException(e)(e.name)
                          if not both:
                              return ctx.hex()
                          ctx = repo[ctx.hex()]
                          return ctx.hex(), ctx.rev()
                      return _lookup(context_uid, repo_id, revision, both)
                  @reraise_safe_exceptions
                  def sync_push(self, wire, url):
                      if not self.check_url(url, wire['config']):
                          return
                      repo = self._factory.repo(wire)
                      # Disable any prompts for this repo
-                     repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
+                     repo.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
                      bookmarks = list(dict(repo._bookmarks).keys())
                      remote = peer(repo, {}, safe_bytes(url))
                      # Disable any prompts for this remote
-                     remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
+                     remote.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
                      return exchange.push(
                          repo, remote, newbranch=True, bookmarks=bookmarks).cgresult
                  @reraise_safe_exceptions
                  def revision(self, wire, rev):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, rev)
                      return ctx.rev()
                  @reraise_safe_exceptions
                  def rev_range(self, wire, commit_filter):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _rev_range(_context_uid, _repo_id, _filter):
                          repo = self._factory.repo(wire)
                          revisions = [
                              ascii_str(repo[rev].hex())
                              for rev in revrange(repo, list(map(ascii_bytes, commit_filter)))
                          ]
                          return revisions
                      return _rev_range(context_uid, repo_id, sorted(commit_filter))
                  @reraise_safe_exceptions
                  def rev_range_hash(self, wire, node):
                      repo = self._factory.repo(wire)
                      def get_revs(repo, rev_opt):
                          if rev_opt:
                              revs = revrange(repo, rev_opt)
                              if len(revs) == 0:
                                  return (nullrev, nullrev)
                              return max(revs), min(revs)
                          else:
                              return len(repo) - 1, 0
                      stop, start = get_revs(repo, [node + ':'])
                      revs = [ascii_str(repo[r].hex()) for r in range(start, stop + 1)]
                      return revs
                  @reraise_safe_exceptions
                  def revs_from_revspec(self, wire, rev_spec, *args, **kwargs):
                      org_path = safe_bytes(wire["path"])
                      other_path = safe_bytes(kwargs.pop('other_path', ''))
                      # case when we want to compare two independent repositories
                      if other_path and other_path != wire["path"]:
                          baseui = self._factory._create_config(wire["config"])
                          repo = unionrepo.makeunionrepository(baseui, other_path, org_path)
                      else:
                          repo = self._factory.repo(wire)
                      return list(repo.revs(rev_spec, *args))
                  @reraise_safe_exceptions
                  def verify(self, wire,):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      baseui, output = patch_ui_message_output(baseui)
                      repo.ui = baseui
                      verify.verify(repo)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def hg_update_cache(self, wire,):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      baseui, output = patch_ui_message_output(baseui)
                      repo.ui = baseui
                      with repo.wlock(), repo.lock():
                          repo.updatecaches(full=True)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def hg_rebuild_fn_cache(self, wire,):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      baseui, output = patch_ui_message_output(baseui)
                      repo.ui = baseui
                      repair.rebuildfncache(baseui, repo)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def tags(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tags(_context_uid, _repo_id):
                          repo = self._factory.repo(wire)
                          return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo.tags().items()}
                      return _tags(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def update(self, wire, node='', clean=False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      node = safe_bytes(node)
                      commands.update(baseui, repo, node=node, clean=clean)
                  @reraise_safe_exceptions
                  def identify(self, wire):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      output = io.BytesIO()
                      baseui.write = output.write
                      # This is required to get a full node id
                      baseui.debugflag = True
                      commands.identify(baseui, repo, id=True)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def heads(self, wire, branch=None):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      output = io.BytesIO()
                      def write(data, **unused_kwargs):
                          output.write(data)
                      baseui.write = write
                      if branch:
                          args = [safe_bytes(branch)]
                      else:
                          args = []
                      commands.heads(baseui, repo, template=b'{node} ', *args)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def ancestor(self, wire, revision1, revision2):
                      repo = self._factory.repo(wire)
                      changelog = repo.changelog
                      lookup = repo.lookup
                      a = changelog.ancestor(lookup(safe_bytes(revision1)), lookup(safe_bytes(revision2)))
                      return hex(a)
                  @reraise_safe_exceptions
                  def clone(self, wire, source, dest, update_after_clone=False, hooks=True):
                      baseui = self._factory._create_config(wire["config"], hooks=hooks)
                      clone(baseui, safe_bytes(source), safe_bytes(dest), noupdate=not update_after_clone)
                  @reraise_safe_exceptions
                  def commitctx(self, wire, message, parents, commit_time, commit_timezone, user, files, extra, removed, updated):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      publishing = baseui.configbool(b'phases', b'publish')
                      def _filectxfn(_repo, ctx, path: bytes):
                          """
                          Marks given path as added/changed/removed in a given _repo. This is
                          for internal mercurial commit function.
                          """
                          # check if this path is removed
                          if safe_str(path) in removed:
                              # returning None is a way to mark node for removal
                              return None
                          # check if this path is added
                          for node in updated:
                              if safe_bytes(node['path']) == path:
                                  return memfilectx(
                                      _repo,
                                      changectx=ctx,
                                      path=safe_bytes(node['path']),
                                      data=safe_bytes(node['content']),
                                      islink=False,
                                      isexec=bool(node['mode'] & stat.S_IXUSR),
                                      copysource=False)
                          abort_exc = exceptions.AbortException()
                          raise abort_exc(f"Given path haven't been marked as added, changed or removed ({path})")
                      if publishing:
                          new_commit_phase = b'public'
                      else:
                          new_commit_phase = b'draft'
                      with repo.ui.configoverride({(b'phases', b'new-commit'): new_commit_phase}):
                          kwargs = {safe_bytes(k): safe_bytes(v) for k, v in extra.items()}
                          commit_ctx = memctx(
                              repo=repo,
                              parents=parents,
                              text=safe_bytes(message),
                              files=[safe_bytes(x) for x in files],
                              filectxfn=_filectxfn,
                              user=safe_bytes(user),
                              date=(commit_time, commit_timezone),
                              extra=kwargs)
                          n = repo.commitctx(commit_ctx)
                          new_id = hex(n)
                          return new_id
                  @reraise_safe_exceptions
                  def pull(self, wire, url, commit_ids=None):
                      repo = self._factory.repo(wire)
                      # Disable any prompts for this repo
-                     repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
+                     repo.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
                      remote = peer(repo, {}, safe_bytes(url))
                      # Disable any prompts for this remote
-                     remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
+                     remote.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
                      if commit_ids:
                          commit_ids = [bin(commit_id) for commit_id in commit_ids]
                      return exchange.pull(
                          repo, remote, heads=commit_ids, force=None).cgresult
                  @reraise_safe_exceptions
                  def pull_cmd(self, wire, source, bookmark='', branch='', revision='', hooks=True):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'], hooks=hooks)
                      source = safe_bytes(source)
                      # Mercurial internally has a lot of logic that checks ONLY if
                      # option is defined, we just pass those if they are defined then
-                     opts = {}
+                     opts = {"remote_hidden": False}
                      if bookmark:
                          opts['bookmark'] = [safe_bytes(x) for x in bookmark] \
                              if isinstance(bookmark, list) else safe_bytes(bookmark)
                      if branch:
                          opts['branch'] = [safe_bytes(x) for x in branch] \
                              if isinstance(branch, list) else safe_bytes(branch)
                      if revision:
                          opts['rev'] = [safe_bytes(x) for x in revision] \
                              if isinstance(revision, list) else safe_bytes(revision)
                      commands.pull(baseui, repo, source, **opts)
                  @reraise_safe_exceptions
                  def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'], hooks=hooks)
                      revisions = [safe_bytes(x) for x in revisions] \
                          if isinstance(revisions, list) else safe_bytes(revisions)
                      commands.push(baseui, repo, safe_bytes(dest_path),
                                    rev=revisions,
                                    new_branch=push_branches)
                  @reraise_safe_exceptions
                  def strip(self, wire, revision, update, backup):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      hgext_strip.strip(
                          repo.baseui, repo, ctx.node(), update=update, backup=backup)
                  @reraise_safe_exceptions
                  def get_unresolved_files(self, wire):
                      repo = self._factory.repo(wire)
                      log.debug('Calculating unresolved files for repo: %s', repo)
                      output = io.BytesIO()
                      def write(data, **unused_kwargs):
                          output.write(data)
                      baseui = self._factory._create_config(wire['config'])
                      baseui.write = write
                      commands.resolve(baseui, repo, list=True)
                      unresolved = output.getvalue().splitlines(0)
                      return unresolved
                  @reraise_safe_exceptions
                  def merge(self, wire, revision):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
                      # In case of sub repositories are used mercurial prompts the user in
                      # case of merge conflicts or different sub repository sources. By
                      # setting the interactive flag to `False` mercurial doesn't prompt the
                      # used but instead uses a default value.
-                     repo.ui.setconfig(b'ui', b'interactive', False)
+                     repo.ui.setconfig(b'ui', b'interactive', b'false')
                      commands.merge(baseui, repo, rev=safe_bytes(revision))
                  @reraise_safe_exceptions
                  def merge_state(self, wire):
                      repo = self._factory.repo(wire)
                      repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
                      # In case of sub repositories are used mercurial prompts the user in
                      # case of merge conflicts or different sub repository sources. By
                      # setting the interactive flag to `False` mercurial doesn't prompt the
                      # used but instead uses a default value.
-                     repo.ui.setconfig(b'ui', b'interactive', False)
+                     repo.ui.setconfig(b'ui', b'interactive', b'false')
                      ms = hg_merge.mergestate(repo)
                      return [x for x in ms.unresolved()]
                  @reraise_safe_exceptions
                  def commit(self, wire, message, username, close_branch=False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      repo.ui.setconfig(b'ui', b'username', safe_bytes(username))
                      commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch)
                  @reraise_safe_exceptions
                  def rebase(self, wire, source='', dest='', abort=False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
                      # In case of sub repositories are used mercurial prompts the user in
                      # case of merge conflicts or different sub repository sources. By
                      # setting the interactive flag to `False` mercurial doesn't prompt the
                      # used but instead uses a default value.
-                     repo.ui.setconfig(b'ui', b'interactive', False)
+                     repo.ui.setconfig(b'ui', b'interactive', b'false')
                      rebase_kws = dict(
                          keep=not abort,
                          abort=abort
                      )
                      if source:
                          source = repo[source]
                          rebase_kws['base'] = [source.hex()]
                      if dest:
                          dest = repo[dest]
                          rebase_kws['dest'] = dest.hex()
                      rebase.rebase(baseui, repo, **rebase_kws)
                  @reraise_safe_exceptions
                  def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      node = ctx.node()
                      date = (tag_time, tag_timezone)
                      try:
                          hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date)
                      except Abort as e:
                          log.exception("Tag operation aborted")
                          # Exception can contain unicode which we convert
                          raise exceptions.AbortException(e)(repr(e))
                  @reraise_safe_exceptions
                  def bookmark(self, wire, bookmark, revision=''):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      revision = revision or ''
                      commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
                  @reraise_safe_exceptions
                  def install_hooks(self, wire, force=False):
                      # we don't need any special hooks for Mercurial
                      pass
                  @reraise_safe_exceptions
                  def get_hooks_info(self, wire):
                      return {
                          'pre_version': vcsserver.get_version(),
                          'post_version': vcsserver.get_version(),
                      }
                  @reraise_safe_exceptions
                  def set_head_ref(self, wire, head_name):
                      pass
                  @reraise_safe_exceptions
                  def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
                                   archive_dir_name, commit_id, cache_config):
                      def file_walker(_commit_id, path):
                          repo = self._factory.repo(wire)
                          ctx = repo[_commit_id]
                          is_root = path in ['', '/']
                          if is_root:
                              matcher = alwaysmatcher(badfn=None)
                          else:
                              matcher = patternmatcher('', [(b'glob', safe_bytes(path)+b'/**', b'')], badfn=None)
                          file_iter = ctx.manifest().walk(matcher)
                          for fn in file_iter:
                              file_path = fn
                              flags = ctx.flags(fn)
                              mode = b'x' in flags and 0o755 or 0o644
                              is_link = b'l' in flags
                              yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
                      return store_archive_in_cache(
                          file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)

vcsserver/remote/svn_remote.py

0 +8 -3

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import subprocess
              from urllib.error import URLError
              import urllib.parse
              import logging
              import posixpath as vcspath
              import io
              import urllib.request
              import urllib.parse
              import urllib.error
              import traceback
              import svn.client  # noqa
              import svn.core  # noqa
              import svn.delta  # noqa
              import svn.diff  # noqa
              import svn.fs  # noqa
              import svn.repos  # noqa
              import rhodecode
              from vcsserver import svn_diff, exceptions, subprocessio, settings
              from vcsserver.base import (
                  RepoFactory,
                  raise_from_original,
                  ArchiveNode,
                  store_archive_in_cache,
                  BytesEnvelope,
                  BinaryEnvelope,
              )
              from vcsserver.exceptions import NoContentException
-             from vcsserver.str_utils import safe_str, safe_bytes
-             from vcsserver.type_utils import assert_bytes
              from vcsserver.vcs_base import RemoteBase
+             from vcsserver.lib.str_utils import safe_str, safe_bytes
+             from vcsserver.lib.type_utils import assert_bytes
              from vcsserver.lib.svnremoterepo import svnremoterepo
+             from vcsserver.lib.svn_txn_utils import store_txn_id_data
              log = logging.getLogger(__name__)
              svn_compatible_versions_map = {
                  'pre-1.4-compatible': '1.3',
                  'pre-1.5-compatible': '1.4',
                  'pre-1.6-compatible': '1.5',
                  'pre-1.8-compatible': '1.7',
                  'pre-1.9-compatible': '1.8',
              }
              current_compatible_version = '1.14'
              def reraise_safe_exceptions(func):
                  """Decorator for converting svn exceptions to something neutral."""
                  def wrapper(*args, **kwargs):
                      try:
                          return func(*args, **kwargs)
                      except Exception as e:
                          if not hasattr(e, '_vcs_kind'):
                              log.exception("Unhandled exception in svn remote call")
                              raise_from_original(exceptions.UnhandledException(e), e)
                          raise
                  return wrapper
              class SubversionFactory(RepoFactory):
                  repo_type = 'svn'
                  def _create_repo(self, wire, create, compatible_version):
                      path = svn.core.svn_path_canonicalize(wire['path'])
                      if create:
                          fs_config = {'compatible-version': current_compatible_version}
                          if compatible_version:
                              compatible_version_string = \
                                  svn_compatible_versions_map.get(compatible_version) \
                                  or compatible_version
                              fs_config['compatible-version'] = compatible_version_string
                          log.debug('Create SVN repo with config `%s`', fs_config)
                          repo = svn.repos.create(path, "", "", None, fs_config)
                      else:
                          repo = svn.repos.open(path)
                      log.debug('repository created: got SVN object: %s', repo)
                      return repo
                  def repo(self, wire, create=False, compatible_version=None):
                      """
                      Get a repository instance for the given path.
                      """
                      return self._create_repo(wire, create, compatible_version)
              NODE_TYPE_MAPPING = {
                  svn.core.svn_node_file: 'file',
                  svn.core.svn_node_dir: 'dir',
              }
              class SvnRemote(RemoteBase):
                  def __init__(self, factory, hg_factory=None):
                      self._factory = factory
                      self._bulk_methods = {
                          # NOT supported in SVN ATM...
                      }
                      self._bulk_file_methods = {
                          "size": self.get_file_size,
                          "data": self.get_file_content,
                          "flags": self.get_node_type,
                          "is_binary": self.is_binary,
                          "md5": self.md5_hash
                      }
                  @reraise_safe_exceptions
                  def bulk_file_request(self, wire, commit_id, path, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      # since we use unified API, we need to cast from str to in for SVN
                      commit_id = int(commit_id)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_file_methods[attr]
                                  wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  result[attr] = method(wire, _commit_id, _path)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
                          return result
                      return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
                  @reraise_safe_exceptions
                  def discover_svn_version(self):
                      try:
                          import svn.core
                          svn_ver = svn.core.SVN_VERSION
                      except ImportError:
                          svn_ver = None
                      return safe_str(svn_ver)
                  @reraise_safe_exceptions
                  def is_empty(self, wire):
                      try:
                          return self.lookup(wire, -1) == 0
                      except Exception:
                          log.exception("failed to read object_store")
                          return False
                  def check_url(self, url, config):
                      # uuid function gets only valid UUID from proper repo, else
                      # throws exception
                      username, password, src_url = self.get_url_and_credentials(url)
                      try:
                          svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid
                      except Exception:
                          tb = traceback.format_exc()
                          log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
                          raise URLError(f'"{url}" is not a valid Subversion source url.')
                      return True
                  def is_path_valid_repository(self, wire, path):
                      # NOTE(marcink):  short circuit the check for SVN repo
                      # the repos.open might be expensive to check, but we have one cheap
                      # pre-condition that we can use, to check for 'format' file
                      if not os.path.isfile(os.path.join(path, 'format')):
                          return False
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _assert_correct_path(_context_uid, _repo_id, fast_check):
                          try:
                              svn.repos.open(path)
                          except svn.core.SubversionException:
                              tb = traceback.format_exc()
                              log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
                              return False
                          return True
                      return _assert_correct_path(context_uid, repo_id, True)
                  @reraise_safe_exceptions
                  def verify(self, wire,):
                      repo_path = wire['path']
                      if not self.is_path_valid_repository(wire, repo_path):
                          raise Exception(
                              f"Path {repo_path} is not a valid Subversion repository.")
                      cmd = ['svnadmin', 'info', repo_path]
                      stdout, stderr = subprocessio.run_command(cmd)
                      return stdout
                  @reraise_safe_exceptions
                  def lookup(self, wire, revision):
                      if revision not in [-1, None, 'HEAD']:
                          raise NotImplementedError
                      repo = self._factory.repo(wire)
                      fs_ptr = svn.repos.fs(repo)
                      head = svn.fs.youngest_rev(fs_ptr)
                      return head
                  @reraise_safe_exceptions
                  def lookup_interval(self, wire, start_ts, end_ts):
                      repo = self._factory.repo(wire)
                      fsobj = svn.repos.fs(repo)
                      start_rev = None
                      end_rev = None
                      if start_ts:
                          start_ts_svn = apr_time_t(start_ts)
                          start_rev = svn.repos.dated_revision(repo, start_ts_svn) + 1
                      else:
                          start_rev = 1
                      if end_ts:
                          end_ts_svn = apr_time_t(end_ts)
                          end_rev = svn.repos.dated_revision(repo, end_ts_svn)
                      else:
                          end_rev = svn.fs.youngest_rev(fsobj)
                      return start_rev, end_rev
                  @reraise_safe_exceptions
                  def revision_properties(self, wire, revision):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _revision_properties(_repo_id, _revision):
                          repo = self._factory.repo(wire)
                          fs_ptr = svn.repos.fs(repo)
                          return svn.fs.revision_proplist(fs_ptr, revision)
                      return _revision_properties(repo_id, revision)
                  def revision_changes(self, wire, revision):
                      repo = self._factory.repo(wire)
                      fsobj = svn.repos.fs(repo)
                      rev_root = svn.fs.revision_root(fsobj, revision)
                      editor = svn.repos.ChangeCollector(fsobj, rev_root)
                      editor_ptr, editor_baton = svn.delta.make_editor(editor)
                      base_dir = ""
                      send_deltas = False
                      svn.repos.replay2(
                          rev_root, base_dir, svn.core.SVN_INVALID_REVNUM, send_deltas,
                          editor_ptr, editor_baton, None)
                      added = []
                      changed = []
                      removed = []
                      # TODO: CHANGE_ACTION_REPLACE: Figure out where it belongs
                      for path, change in editor.changes.items():
                          # TODO: Decide what to do with directory nodes. Subversion can add
                          # empty directories.
                          if change.item_kind == svn.core.svn_node_dir:
                              continue
                          if change.action in [svn.repos.CHANGE_ACTION_ADD]:
                              added.append(path)
                          elif change.action in [svn.repos.CHANGE_ACTION_MODIFY,
                                                 svn.repos.CHANGE_ACTION_REPLACE]:
                              changed.append(path)
                          elif change.action in [svn.repos.CHANGE_ACTION_DELETE]:
                              removed.append(path)
                          else:
                              raise NotImplementedError(
                                  "Action {} not supported on path {}".format(
                                      change.action, path))
                      changes = {
                          'added': added,
                          'changed': changed,
                          'removed': removed,
                      }
                      return changes
                  @reraise_safe_exceptions
                  def node_history(self, wire, path, revision, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
                          cross_copies = False
                          repo = self._factory.repo(wire)
                          fsobj = svn.repos.fs(repo)
                          rev_root = svn.fs.revision_root(fsobj, revision)
                          history_revisions = []
                          history = svn.fs.node_history(rev_root, path)
                          history = svn.fs.history_prev(history, cross_copies)
                          while history:
                              __, node_revision = svn.fs.history_location(history)
                              history_revisions.append(node_revision)
                              if limit and len(history_revisions) >= limit:
                                  break
                              history = svn.fs.history_prev(history, cross_copies)
                          return history_revisions
                      return _assert_correct_path(context_uid, repo_id, path, revision, limit)
                  @reraise_safe_exceptions
                  def node_properties(self, wire, path, revision):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_properties(_repo_id, _path, _revision):
                          repo = self._factory.repo(wire)
                          fsobj = svn.repos.fs(repo)
                          rev_root = svn.fs.revision_root(fsobj, revision)
                          return svn.fs.node_proplist(rev_root, path)
                      return _node_properties(repo_id, path, revision)
                  def file_annotate(self, wire, path, revision):
                      abs_path = 'file://' + urllib.request.pathname2url(
                          vcspath.join(wire['path'], path))
                      file_uri = svn.core.svn_path_canonicalize(abs_path)
                      start_rev = svn_opt_revision_value_t(0)
                      peg_rev = svn_opt_revision_value_t(revision)
                      end_rev = peg_rev
                      annotations = []
                      def receiver(line_no, revision, author, date, line, pool):
                          annotations.append((line_no, revision, line))
                      # TODO: Cannot use blame5, missing typemap function in the swig code
                      try:
                          svn.client.blame2(
                              file_uri, peg_rev, start_rev, end_rev,
                              receiver, svn.client.create_context())
                      except svn.core.SubversionException as exc:
                          log.exception("Error during blame operation.")
                          raise Exception(
                              f"Blame not supported or file does not exist at path {path}. "
                              f"Error {exc}.")
                      return BinaryEnvelope(annotations)
                  @reraise_safe_exceptions
                  def get_node_type(self, wire, revision=None, path=''):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_node_type(_repo_id, _revision, _path):
                          repo = self._factory.repo(wire)
                          fs_ptr = svn.repos.fs(repo)
                          if _revision is None:
                              _revision = svn.fs.youngest_rev(fs_ptr)
                          root = svn.fs.revision_root(fs_ptr, _revision)
                          node = svn.fs.check_path(root, path)
                          return NODE_TYPE_MAPPING.get(node, None)
                      return _get_node_type(repo_id, revision, path)
                  @reraise_safe_exceptions
                  def get_nodes(self, wire, revision=None, path=''):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_nodes(_repo_id, _path, _revision):
                          repo = self._factory.repo(wire)
                          fsobj = svn.repos.fs(repo)
                          if _revision is None:
                              _revision = svn.fs.youngest_rev(fsobj)
                          root = svn.fs.revision_root(fsobj, _revision)
                          entries = svn.fs.dir_entries(root, path)
                          result = []
                          for entry_path, entry_info in entries.items():
                              result.append(
                                  (entry_path, NODE_TYPE_MAPPING.get(entry_info.kind, None)))
                          return result
                      return _get_nodes(repo_id, path, revision)
                  @reraise_safe_exceptions
                  def get_file_content(self, wire, rev=None, path=''):
                      repo = self._factory.repo(wire)
                      fsobj = svn.repos.fs(repo)
                      if rev is None:
                          rev = svn.fs.youngest_rev(fsobj)
                      root = svn.fs.revision_root(fsobj, rev)
                      content = svn.core.Stream(svn.fs.file_contents(root, path))
                      return BytesEnvelope(content.read())
                  @reraise_safe_exceptions
                  def get_file_size(self, wire, revision=None, path=''):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_file_size(_repo_id, _revision, _path):
                          repo = self._factory.repo(wire)
                          fsobj = svn.repos.fs(repo)
                          if _revision is None:
                              _revision = svn.fs.youngest_revision(fsobj)
                          root = svn.fs.revision_root(fsobj, _revision)
                          size = svn.fs.file_length(root, path)
                          return size
                      return _get_file_size(repo_id, revision, path)
                  def create_repository(self, wire, compatible_version=None):
                      log.info('Creating Subversion repository in path "%s"', wire['path'])
                      self._factory.repo(wire, create=True,
                                         compatible_version=compatible_version)
                  def get_url_and_credentials(self, src_url) -> tuple[str, str, str]:
                      obj = urllib.parse.urlparse(src_url)
                      username = obj.username or ''
                      password = obj.password or ''
                      return username, password, src_url
                  def import_remote_repository(self, wire, src_url):
                      repo_path = wire['path']
                      if not self.is_path_valid_repository(wire, repo_path):
                          raise Exception(
                              f"Path {repo_path} is not a valid Subversion repository.")
                      username, password, src_url = self.get_url_and_credentials(src_url)
                      rdump_cmd = ['svnrdump', 'dump', '--non-interactive',
                                   '--trust-server-cert-failures=unknown-ca']
                      if username and password:
                          rdump_cmd += ['--username', username, '--password', password]
                      rdump_cmd += [src_url]
                      rdump = subprocess.Popen(
                          rdump_cmd,
                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                      load = subprocess.Popen(
                          ['svnadmin', 'load', repo_path], stdin=rdump.stdout)
                      # TODO: johbo: This can be a very long operation, might be better
                      # to track some kind of status and provide an api to check if the
                      # import is done.
                      rdump.wait()
                      load.wait()
                      log.debug('Return process ended with code: %s', rdump.returncode)
                      if rdump.returncode != 0:
                          errors = rdump.stderr.read()
                          log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors)
                          reason = 'UNKNOWN'
                          if b'svnrdump: E230001:' in errors:
                              reason = 'INVALID_CERTIFICATE'
                          if reason == 'UNKNOWN':
                              reason = f'UNKNOWN:{safe_str(errors)}'
                          raise Exception(
                              'Failed to dump the remote repository from {}. Reason:{}'.format(
                                  src_url, reason))
                      if load.returncode != 0:
                          raise Exception(
                              f'Failed to load the dump of remote repository from {src_url}.')
                  def commit(self, wire, message, author, timestamp, updated, removed):
                      message = safe_bytes(message)
                      author = safe_bytes(author)
                      repo = self._factory.repo(wire)
                      fsobj = svn.repos.fs(repo)
                      rev = svn.fs.youngest_rev(fsobj)
                      txn = svn.repos.fs_begin_txn_for_commit(repo, rev, author, message)
                      txn_root = svn.fs.txn_root(txn)
                      for node in updated:
                          TxnNodeProcessor(node, txn_root).update()
                      for node in removed:
                          TxnNodeProcessor(node, txn_root).remove()
+                     svn_txn_id = safe_str(svn.fs.svn_fs_txn_name(txn))
+                     full_repo_path = wire['path']
+                     txn_id_data = {'svn_txn_id': svn_txn_id, 'rc_internal_commit': True}
+                     store_txn_id_data(full_repo_path, svn_txn_id, txn_id_data)
                      commit_id = svn.repos.fs_commit_txn(repo, txn)
                      if timestamp:
                          apr_time = apr_time_t(timestamp)
                          ts_formatted = svn.core.svn_time_to_cstring(apr_time)
                          svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted)
                      log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
                      return commit_id
                  @reraise_safe_exceptions
                  def diff(self, wire, rev1, rev2, path1=None, path2=None,
                           ignore_whitespace=False, context=3):
                      wire.update(cache=False)
                      repo = self._factory.repo(wire)
                      diff_creator = SvnDiffer(
                          repo, rev1, path1, rev2, path2, ignore_whitespace, context)
                      try:
                          return BytesEnvelope(diff_creator.generate_diff())
                      except svn.core.SubversionException as e:
                          log.exception(
                              "Error during diff operation operation. "
                              "Path might not exist %s, %s", path1, path2)
                          return BytesEnvelope(b'')
                  @reraise_safe_exceptions
                  def is_large_file(self, wire, path):
                      return False
                  @reraise_safe_exceptions
                  def is_binary(self, wire, rev, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_binary(_repo_id, _rev, _path):
                          raw_bytes = self.get_file_content(wire, rev, path)
                          if not raw_bytes:
                              return False
                          return b'\0' in raw_bytes
                      return _is_binary(repo_id, rev, path)
                  @reraise_safe_exceptions
                  def md5_hash(self, wire, rev, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _md5_hash(_repo_id, _rev, _path):
                          return ''
                      return _md5_hash(repo_id, rev, path)
                  @reraise_safe_exceptions
                  def run_svn_command(self, wire, cmd, **opts):
                      path = wire.get('path', None)
                      debug_mode = rhodecode.ConfigGet().get_bool('debug')
                      if path and os.path.isdir(path):
                          opts['cwd'] = path
                      safe_call = opts.pop('_safe', False)
                      svnenv = os.environ.copy()
                      svnenv.update(opts.pop('extra_env', {}))
                      _opts = {'env': svnenv, 'shell': False}
                      try:
                          _opts.update(opts)
                          proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
                          return b''.join(proc), b''.join(proc.stderr)
                      except OSError as err:
                          if safe_call:
                              return '', safe_str(err).strip()
                          else:
                              cmd = ' '.join(map(safe_str, cmd))  # human friendly CMD
                              call_opts = {}
                              if debug_mode:
                                  call_opts = _opts
                              tb_err = ("Couldn't run svn command ({}).\n"
                                        "Original error was:{}\n"
                                        "Call options:{}\n"
                                        .format(cmd, err, call_opts))
                              log.exception(tb_err)
                              raise exceptions.VcsException()(tb_err)
                  @reraise_safe_exceptions
                  def install_hooks(self, wire, force=False):
                      from vcsserver.hook_utils import install_svn_hooks
                      repo_path = wire['path']
                      binary_dir = settings.BINARY_DIR
                      executable = None
                      if binary_dir:
                          executable = os.path.join(binary_dir, 'python3')
                      return install_svn_hooks(repo_path, force_create=force)
                  @reraise_safe_exceptions
                  def get_hooks_info(self, wire):
                      from vcsserver.hook_utils import (
                          get_svn_pre_hook_version, get_svn_post_hook_version)
                      repo_path = wire['path']
                      return {
                          'pre_version': get_svn_pre_hook_version(repo_path),
                          'post_version': get_svn_post_hook_version(repo_path),
                      }
                  @reraise_safe_exceptions
                  def set_head_ref(self, wire, head_name):
                      pass
                  @reraise_safe_exceptions
                  def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
                                   archive_dir_name, commit_id, cache_config):
                      def walk_tree(root, root_dir, _commit_id):
                          """
                          Special recursive svn repo walker
                          """
                          root_dir = safe_bytes(root_dir)
                          filemode_default = 0o100644
                          filemode_executable = 0o100755
                          file_iter = svn.fs.dir_entries(root, root_dir)
                          for f_name in file_iter:
                              f_type = NODE_TYPE_MAPPING.get(file_iter[f_name].kind, None)
                              if f_type == 'dir':
                                  # return only DIR, and then all entries in that dir
                                  yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type
                                  new_root = os.path.join(root_dir, f_name)
                                  yield from walk_tree(root, new_root, _commit_id)
                              else:
                                  f_path = os.path.join(root_dir, f_name).rstrip(b'/')
                                  prop_list = svn.fs.node_proplist(root, f_path)
                                  f_mode = filemode_default
                                  if prop_list.get('svn:executable'):
                                      f_mode = filemode_executable
                                  f_is_link = False
                                  if prop_list.get('svn:special'):
                                      f_is_link = True
                                  data = {
                                      'is_link': f_is_link,
                                      'mode': f_mode,
                                      'content_stream': svn.core.Stream(svn.fs.file_contents(root, f_path)).read
                                  }
                                  yield f_path, data, f_type
                      def file_walker(_commit_id, path):
                          repo = self._factory.repo(wire)
                          root = svn.fs.revision_root(svn.repos.fs(repo), int(commit_id))
                          def no_content():
                              raise NoContentException()
                          for f_name, f_data, f_type in walk_tree(root, path, _commit_id):
                              file_path = f_name
                              if f_type == 'dir':
                                  mode = f_data['mode']
                                  yield ArchiveNode(file_path, mode, False, no_content)
                              else:
                                  mode = f_data['mode']
                                  is_link = f_data['is_link']
                                  data_stream = f_data['content_stream']
                                  yield ArchiveNode(file_path, mode, is_link, data_stream)
                      return store_archive_in_cache(
                          file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
              class SvnDiffer:
                  """
                  Utility to create diffs based on difflib and the Subversion api
                  """
                  binary_content = False
                  def __init__(
                          self, repo, src_rev, src_path, tgt_rev, tgt_path,
                          ignore_whitespace, context):
                      self.repo = repo
                      self.ignore_whitespace = ignore_whitespace
                      self.context = context
                      fsobj = svn.repos.fs(repo)
                      self.tgt_rev = tgt_rev
                      self.tgt_path = tgt_path or ''
                      self.tgt_root = svn.fs.revision_root(fsobj, tgt_rev)
                      self.tgt_kind = svn.fs.check_path(self.tgt_root, self.tgt_path)
                      self.src_rev = src_rev
                      self.src_path = src_path or self.tgt_path
                      self.src_root = svn.fs.revision_root(fsobj, src_rev)
                      self.src_kind = svn.fs.check_path(self.src_root, self.src_path)
                      self._validate()
                  def _validate(self):
                      if (self.tgt_kind != svn.core.svn_node_none and
                              self.src_kind != svn.core.svn_node_none and
                              self.src_kind != self.tgt_kind):
                          # TODO: johbo: proper error handling
                          raise Exception(
                              "Source and target are not compatible for diff generation. "
                              "Source type: %s, target type: %s" %
                              (self.src_kind, self.tgt_kind))
                  def generate_diff(self) -> bytes:
                      buf = io.BytesIO()
                      if self.tgt_kind == svn.core.svn_node_dir:
                          self._generate_dir_diff(buf)
                      else:
                          self._generate_file_diff(buf)
                      return buf.getvalue()
                  def _generate_dir_diff(self, buf: io.BytesIO):
                      editor = DiffChangeEditor()
                      editor_ptr, editor_baton = svn.delta.make_editor(editor)
                      svn.repos.dir_delta2(
                          self.src_root,
                          self.src_path,
                          '',  # src_entry
                          self.tgt_root,
                          self.tgt_path,
                          editor_ptr, editor_baton,
                          authorization_callback_allow_all,
                          False,  # text_deltas
                          svn.core.svn_depth_infinity,  # depth
                          False,  # entry_props
                          False,  # ignore_ancestry
                      )
                      for path, __, change in sorted(editor.changes):
                          self._generate_node_diff(
                              buf, change, path, self.tgt_path, path, self.src_path)
                  def _generate_file_diff(self, buf: io.BytesIO):
                      change = None
                      if self.src_kind == svn.core.svn_node_none:
                          change = "add"
                      elif self.tgt_kind == svn.core.svn_node_none:
                          change = "delete"
                      tgt_base, tgt_path = vcspath.split(self.tgt_path)
                      src_base, src_path = vcspath.split(self.src_path)
                      self._generate_node_diff(
                          buf, change, tgt_path, tgt_base, src_path, src_base)
                  def _generate_node_diff(
                          self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base):
                      tgt_path_bytes = safe_bytes(tgt_path)
                      tgt_path = safe_str(tgt_path)
                      src_path_bytes = safe_bytes(src_path)
                      src_path = safe_str(src_path)
                      if self.src_rev == self.tgt_rev and tgt_base == src_base:
                          # makes consistent behaviour with git/hg to return empty diff if
                          # we compare same revisions
                          return
                      tgt_full_path = vcspath.join(tgt_base, tgt_path)
                      src_full_path = vcspath.join(src_base, src_path)
                      self.binary_content = False
                      mime_type = self._get_mime_type(tgt_full_path)
                      if mime_type and not mime_type.startswith(b'text'):
                          self.binary_content = True
                          buf.write(b"=" * 67 + b'\n')
                          buf.write(b"Cannot display: file marked as a binary type.\n")
                          buf.write(b"svn:mime-type = %s\n" % mime_type)
                      buf.write(b"Index: %b\n" % tgt_path_bytes)
                      buf.write(b"=" * 67 + b'\n')
                      buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes))
                      if change == 'add':
                          # TODO: johbo: SVN is missing a zero here compared to git
                          buf.write(b"new file mode 10644\n")
                          # TODO(marcink): intro to binary detection of svn patches
                          # if self.binary_content:
                          #     buf.write(b'GIT binary patch\n')
                          buf.write(b"--- /dev/null\t(revision 0)\n")
                          src_lines = []
                      else:
                          if change == 'delete':
                              buf.write(b"deleted file mode 10644\n")
                          # TODO(marcink): intro to binary detection of svn patches
                          # if self.binary_content:
                          #     buf.write('GIT binary patch\n')
                          buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev))
                          src_lines = self._svn_readlines(self.src_root, src_full_path)
                      if change == 'delete':
                          buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev)
                          tgt_lines = []
                      else:
                          buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev))
                          tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
                      # we made our diff header, time to generate the diff content into our buffer
                      if not self.binary_content:
                          udiff = svn_diff.unified_diff(
                              src_lines, tgt_lines, context=self.context,
                              ignore_blank_lines=self.ignore_whitespace,
                              ignore_case=False,
                              ignore_space_changes=self.ignore_whitespace)
                          buf.writelines(udiff)
                  def _get_mime_type(self, path) -> bytes:
                      try:
                          mime_type = svn.fs.node_prop(
                              self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
                      except svn.core.SubversionException:
                          mime_type = svn.fs.node_prop(
                              self.src_root, path, svn.core.SVN_PROP_MIME_TYPE)
                      return mime_type
                  def _svn_readlines(self, fs_root, node_path):
                      if self.binary_content:
                          return []
                      node_kind = svn.fs.check_path(fs_root, node_path)
                      if node_kind not in (
                              svn.core.svn_node_file, svn.core.svn_node_symlink):
                          return []
                      content = svn.core.Stream(
                          svn.fs.file_contents(fs_root, node_path)).read()
                      return content.splitlines(True)
              class DiffChangeEditor(svn.delta.Editor):
                  """
                  Records changes between two given revisions
                  """
                  def __init__(self):
                      self.changes = []
                  def delete_entry(self, path, revision, parent_baton, pool=None):
                      self.changes.append((path, None, 'delete'))
                  def add_file(
                          self, path, parent_baton, copyfrom_path, copyfrom_revision,
                          file_pool=None):
                      self.changes.append((path, 'file', 'add'))
                  def open_file(self, path, parent_baton, base_revision, file_pool=None):
                      self.changes.append((path, 'file', 'change'))
              def authorization_callback_allow_all(root, path, pool):
                  return True
              class TxnNodeProcessor:
                  """
                  Utility to process the change of one node within a transaction root.
                  It encapsulates the knowledge of how to add, update or remove
                  a node for a given transaction root. The purpose is to support the method
                  `SvnRemote.commit`.
                  """
                  def __init__(self, node, txn_root):
                      assert_bytes(node['path'])
                      self.node = node
                      self.txn_root = txn_root
                  def update(self):
                      self._ensure_parent_dirs()
                      self._add_file_if_node_does_not_exist()
                      self._update_file_content()
                      self._update_file_properties()
                  def remove(self):
                      svn.fs.delete(self.txn_root, self.node['path'])
                      # TODO: Clean up directory if empty
                  def _ensure_parent_dirs(self):
                      curdir = vcspath.dirname(self.node['path'])
                      dirs_to_create = []
                      while not self._svn_path_exists(curdir):
                          dirs_to_create.append(curdir)
                          curdir = vcspath.dirname(curdir)
                      for curdir in reversed(dirs_to_create):
                          log.debug('Creating missing directory "%s"', curdir)
                          svn.fs.make_dir(self.txn_root, curdir)
                  def _svn_path_exists(self, path):
                      path_status = svn.fs.check_path(self.txn_root, path)
                      return path_status != svn.core.svn_node_none
                  def _add_file_if_node_does_not_exist(self):
                      kind = svn.fs.check_path(self.txn_root, self.node['path'])
                      if kind == svn.core.svn_node_none:
                          svn.fs.make_file(self.txn_root, self.node['path'])
                  def _update_file_content(self):
                      assert_bytes(self.node['content'])
                      handler, baton = svn.fs.apply_textdelta(
                          self.txn_root, self.node['path'], None, None)
                      svn.delta.svn_txdelta_send_string(self.node['content'], handler, baton)
                  def _update_file_properties(self):
                      properties = self.node.get('properties', {})
                      for key, value in properties.items():
                          svn.fs.change_node_prop(
                              self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value))
              def apr_time_t(timestamp):
                  """
                  Convert a Python timestamp into APR timestamp type apr_time_t
                  """
                  return int(timestamp * 1E6)
              def svn_opt_revision_value_t(num):
                  """
                  Put `num` into a `svn_opt_revision_value_t` structure.
                  """
                  value = svn.core.svn_opt_revision_value_t()
                  value.number = num
                  revision = svn.core.svn_opt_revision_t()
                  revision.kind = svn.core.svn_opt_revision_number
                  revision.value = value
                  return revision

vcsserver/scm_app.py

0 +4 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import logging
              import itertools
              import mercurial
              import mercurial.error
              import mercurial.wireprotoserver
              import mercurial.hgweb.common
              import mercurial.hgweb.hgweb_mod
              import webob.exc
              from vcsserver import pygrack, exceptions, settings, git_lfs
-             from vcsserver.str_utils import ascii_bytes, safe_bytes
+             from vcsserver.lib.str_utils import ascii_bytes, safe_bytes
              log = logging.getLogger(__name__)
              # propagated from mercurial documentation
              HG_UI_SECTIONS = [
                  'alias', 'auth', 'decode/encode', 'defaults', 'diff', 'email', 'extensions',
                  'format', 'merge-patterns', 'merge-tools', 'hooks', 'http_proxy', 'smtp',
                  'patch', 'paths', 'profiling', 'server', 'trusted', 'ui', 'web',
              ]
              class HgWeb(mercurial.hgweb.hgweb_mod.hgweb):
                  """Extension of hgweb that simplifies some functions."""
                  def _get_view(self, repo):
                      """Views are not supported."""
                      return repo
                  def loadsubweb(self):
                      """The result is only used in the templater method which is not used."""
                      return None
                  def run(self):
                      """Unused function so raise an exception if accidentally called."""
                      raise NotImplementedError
                  def templater(self, req):
                      """Function used in an unreachable code path.
                      This code is unreachable because we guarantee that the HTTP request,
                      corresponds to a Mercurial command. See the is_hg method. So, we are
                      never going to get a user-visible url.
                      """
                      raise NotImplementedError
                  def archivelist(self, nodeid):
                      """Unused function so raise an exception if accidentally called."""
                      raise NotImplementedError
                  def __call__(self, environ, start_response):
                      """Run the WSGI application.
                      This may be called by multiple threads.
                      """
                      from mercurial.hgweb import request as requestmod
                      req = requestmod.parserequestfromenv(environ)
                      res = requestmod.wsgiresponse(req, start_response)
                      gen = self.run_wsgi(req, res)
                      first_chunk = None
                      try:
                          data = next(gen)
                          def first_chunk():
                              yield data
                      except StopIteration:
                          pass
                      if first_chunk:
                          return itertools.chain(first_chunk(), gen)
                      return gen
                  def _runwsgi(self, req, res, repo):
                      cmd = req.qsparams.get(b'cmd', '')
                      if not mercurial.wireprotoserver.iscmd(cmd):
                          # NOTE(marcink): for unsupported commands, we return bad request
                          # internally from HG
                          log.warning('cmd: `%s` is not supported by the mercurial wireprotocol v1', cmd)
                          from mercurial.hgweb.common import statusmessage
                          res.status = statusmessage(mercurial.hgweb.common.HTTP_BAD_REQUEST)
                          res.setbodybytes(b'')
                          return res.sendresponse()
                      return super()._runwsgi(req, res, repo)
              def sanitize_hg_ui(baseui):
                  # NOTE(marcink): since python3 hgsubversion is deprecated.
                  # From old installations we might still have this set enabled
                  # we explicitly remove this now here to make sure it wont propagate further
                  if baseui.config(b'extensions', b'hgsubversion') is not None:
                      for cfg in (baseui._ocfg, baseui._tcfg, baseui._ucfg):
                          if b'extensions' in cfg:
                              if b'hgsubversion' in cfg[b'extensions']:
                                  del cfg[b'extensions'][b'hgsubversion']
              def make_hg_ui_from_config(repo_config):
                  baseui = mercurial.ui.ui()
                  # clean the baseui object
                  baseui._ocfg = mercurial.config.config()
                  baseui._ucfg = mercurial.config.config()
                  baseui._tcfg = mercurial.config.config()
                  for section, option, value in repo_config:
                      baseui.setconfig(
                          ascii_bytes(section, allow_bytes=True),
                          ascii_bytes(option, allow_bytes=True),
                          ascii_bytes(value, allow_bytes=True))
                  # make our hgweb quiet so it doesn't print output
                  baseui.setconfig(b'ui', b'quiet', b'true')
+                 # use POST requests with args instead of GET with headers - fixes issues with big repos with lots of branches
+                 baseui.setconfig(b'experimental', b'httppostargs', b'true')
                  return baseui
              def update_hg_ui_from_hgrc(baseui, repo_path):
                  path = os.path.join(repo_path, '.hg', 'hgrc')
                  if not os.path.isfile(path):
                      log.debug('hgrc file is not present at %s, skipping...', path)
                      return
                  log.debug('reading hgrc from %s', path)
                  cfg = mercurial.config.config()
                  cfg.read(ascii_bytes(path))
                  for section in HG_UI_SECTIONS:
                      for k, v in cfg.items(section):
                          log.debug('settings ui from file: [%s] %s=%s', section, k, v)
                          baseui.setconfig(
                              ascii_bytes(section, allow_bytes=True),
                              ascii_bytes(k, allow_bytes=True),
                              ascii_bytes(v, allow_bytes=True))
              def create_hg_wsgi_app(repo_path, repo_name, config):
                  """
                  Prepares a WSGI application to handle Mercurial requests.
                  :param config: is a list of 3-item tuples representing a ConfigObject
                      (it is the serialized version of the config object).
                  """
                  log.debug("Creating Mercurial WSGI application")
                  baseui = make_hg_ui_from_config(config)
                  update_hg_ui_from_hgrc(baseui, repo_path)
                  sanitize_hg_ui(baseui)
                  try:
                      return HgWeb(safe_bytes(repo_path), name=safe_bytes(repo_name), baseui=baseui)
                  except mercurial.error.RequirementError as e:
                      raise exceptions.RequirementException(e)(e)
              class GitHandler:
                  """
                  Handler for Git operations like push/pull etc
                  """
                  def __init__(self, repo_location, repo_name, git_path, update_server_info,
                               extras):
                      if not os.path.isdir(repo_location):
                          raise OSError(repo_location)
                      self.content_path = repo_location
                      self.repo_name = repo_name
                      self.repo_location = repo_location
                      self.extras = extras
                      self.git_path = git_path
                      self.update_server_info = update_server_info
                  def __call__(self, environ, start_response):
                      app = webob.exc.HTTPNotFound()
                      candidate_paths = (
                          self.content_path, os.path.join(self.content_path, '.git'))
                      for content_path in candidate_paths:
                          try:
                              app = pygrack.GitRepository(
                                  self.repo_name, content_path, self.git_path,
                                  self.update_server_info, self.extras)
                              break
                          except OSError:
                              continue
                      return app(environ, start_response)
              def create_git_wsgi_app(repo_path, repo_name, config):
                  """
                  Creates a WSGI application to handle Git requests.
                  :param config: is a dictionary holding the extras.
                  """
                  git_path = settings.GIT_EXECUTABLE()
                  update_server_info = config.pop('git_update_server_info')
                  app = GitHandler(
                      repo_path, repo_name, git_path, update_server_info, config)
                  return app
              class GitLFSHandler:
                  """
                  Handler for Git LFS operations
                  """
                  def __init__(self, repo_location, repo_name, git_path, update_server_info,
                               extras):
                      if not os.path.isdir(repo_location):
                          raise OSError(repo_location)
                      self.content_path = repo_location
                      self.repo_name = repo_name
                      self.repo_location = repo_location
                      self.extras = extras
                      self.git_path = git_path
                      self.update_server_info = update_server_info
                  def get_app(self, git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
                      app = git_lfs.create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)
                      return app
              def create_git_lfs_wsgi_app(repo_path, repo_name, config):
                  git_path = settings.GIT_EXECUTABLE()
                  update_server_info = config.pop('git_update_server_info')
                  git_lfs_enabled = config.pop('git_lfs_enabled')
                  git_lfs_store_path = config.pop('git_lfs_store_path')
                  git_lfs_http_scheme = config.pop('git_lfs_http_scheme', 'http')
                  app = GitLFSHandler(
                      repo_path, repo_name, git_path, update_server_info, config)
                  return app.get_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)

vcsserver/subprocessio.py

0 +1 -1

              """
              Module provides a class allowing to wrap communication over subprocess.Popen
              input, output, error streams into a meaningfull, non-blocking, concurrent
              stream processor exposing the output data as an iterator fitting to be a
              return value passed by a WSGI applicaiton to a WSGI server per PEP 3333.
              Copyright (c) 2011  Daniel Dotsenko <dotsa[at]hotmail.com>
              This file is part of git_http_backend.py Project.
              git_http_backend.py Project is free software: you can redistribute it and/or
              modify it under the terms of the GNU Lesser General Public License as
              published by the Free Software Foundation, either version 2.1 of the License,
              or (at your option) any later version.
              git_http_backend.py Project is distributed in the hope that it will be useful,
              but WITHOUT ANY WARRANTY; without even the implied warranty of
              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
              GNU Lesser General Public License for more details.
              You should have received a copy of the GNU Lesser General Public License
              along with git_http_backend.py Project.
              If not, see <http://www.gnu.org/licenses/>.
              """
              import os
              import collections
              import logging
              import subprocess
              import threading
-             from vcsserver.str_utils import safe_str
+             from vcsserver.lib.str_utils import safe_str
              log = logging.getLogger(__name__)
              class StreamFeeder(threading.Thread):
                  """
                  Normal writing into pipe-like is blocking once the buffer is filled.
                  This thread allows a thread to seep data from a file-like into a pipe
                  without blocking the main thread.
                  We close inpipe once the end of the source stream is reached.
                  """
                  def __init__(self, source):
                      super().__init__()
                      self.daemon = True
                      filelike = False
                      self.bytes = b''
                      if type(source) in (str, bytes, bytearray):  # string-like
                          self.bytes = bytes(source)
                      else:  # can be either file pointer or file-like
                          if isinstance(source, int):  # file pointer it is
                              # converting file descriptor (int) stdin into file-like
                              source = os.fdopen(source, 'rb', 16384)
                          # let's see if source is file-like by now
                          filelike = hasattr(source, 'read')
                      if not filelike and not self.bytes:
                          raise TypeError("StreamFeeder's source object must be a readable "
                                          "file-like, a file descriptor, or a string-like.")
                      self.source = source
                      self.readiface, self.writeiface = os.pipe()
                  def run(self):
                      writer = self.writeiface
                      try:
                          if self.bytes:
                              os.write(writer, self.bytes)
                          else:
                              s = self.source
                              while 1:
                                  _bytes = s.read(4096)
                                  if not _bytes:
                                      break
                                  os.write(writer, _bytes)
                      finally:
                          os.close(writer)
                  @property
                  def output(self):
                      return self.readiface
              class InputStreamChunker(threading.Thread):
                  def __init__(self, source, target, buffer_size, chunk_size):
                      super().__init__()
                      self.daemon = True  # die die die.
                      self.source = source
                      self.target = target
                      self.chunk_count_max = int(buffer_size / chunk_size) + 1
                      self.chunk_size = chunk_size
                      self.data_added = threading.Event()
                      self.data_added.clear()
                      self.keep_reading = threading.Event()
                      self.keep_reading.set()
                      self.EOF = threading.Event()
                      self.EOF.clear()
                      self.go = threading.Event()
                      self.go.set()
                  def stop(self):
                      self.go.clear()
                      self.EOF.set()
                      try:
                          # this is not proper, but is done to force the reader thread let
                          # go of the input because, if successful, .close() will send EOF
                          # down the pipe.
                          self.source.close()
                      except Exception:
                          pass
                  def run(self):
                      s = self.source
                      t = self.target
                      cs = self.chunk_size
                      chunk_count_max = self.chunk_count_max
                      keep_reading = self.keep_reading
                      da = self.data_added
                      go = self.go
                      try:
                          b = s.read(cs)
                      except ValueError:
                          b = ''
                      timeout_input = 20
                      while b and go.is_set():
                          if len(t) > chunk_count_max:
                              keep_reading.clear()
                              keep_reading.wait(timeout_input)
                              if len(t) > chunk_count_max + timeout_input:
                                  log.error("Timed out while waiting for input from subprocess.")
                                  os._exit(-1)  # this will cause the worker to recycle itself
                          t.append(b)
                          da.set()
                          try:
                              b = s.read(cs)
                          except ValueError:  # probably "I/O operation on closed file"
                              b = ''
                      self.EOF.set()
                      da.set()  # for cases when done but there was no input.
              class BufferedGenerator:
                  """
                  Class behaves as a non-blocking, buffered pipe reader.
                  Reads chunks of data (through a thread)
                  from a blocking pipe, and attaches these to an array (Deque) of chunks.
                  Reading is halted in the thread when max chunks is internally buffered.
                  The .next() may operate in blocking or non-blocking fashion by yielding
                  '' if no data is ready
                  to be sent or by not returning until there is some data to send
                  When we get EOF from underlying source pipe we raise the marker to raise
                  StopIteration after the last chunk of data is yielded.
                  """
                  def __init__(self, name, source, buffer_size=65536, chunk_size=4096,
                               starting_values=None, bottomless=False):
                      starting_values = starting_values or []
                      self.name = name
                      self.buffer_size = buffer_size
                      self.chunk_size = chunk_size
                      if bottomless:
                          maxlen = int(buffer_size / chunk_size)
                      else:
                          maxlen = None
                      self.data_queue = collections.deque(starting_values, maxlen)
                      self.worker = InputStreamChunker(source, self.data_queue, buffer_size, chunk_size)
                      if starting_values:
                          self.worker.data_added.set()
                      self.worker.start()
                  ####################
                  # Generator's methods
                  ####################
                  def __str__(self):
                      return f'BufferedGenerator(name={self.name} chunk: {self.chunk_size} on buffer: {self.buffer_size})'
                  def __iter__(self):
                      return self
                  def __next__(self):
                      while not self.length and not self.worker.EOF.is_set():
                          self.worker.data_added.clear()
                          self.worker.data_added.wait(0.2)
                      if self.length:
                          self.worker.keep_reading.set()
                          return bytes(self.data_queue.popleft())
                      elif self.worker.EOF.is_set():
                          raise StopIteration
                  def throw(self, exc_type, value=None, traceback=None):
                      if not self.worker.EOF.is_set():
                          raise exc_type(value)
                  def start(self):
                      self.worker.start()
                  def stop(self):
                      self.worker.stop()
                  def close(self):
                      try:
                          self.worker.stop()
                          self.throw(GeneratorExit)
                      except (GeneratorExit, StopIteration):
                          pass
                  ####################
                  # Threaded reader's infrastructure.
                  ####################
                  @property
                  def input(self):
                      return self.worker.w
                  @property
                  def data_added_event(self):
                      return self.worker.data_added
                  @property
                  def data_added(self):
                      return self.worker.data_added.is_set()
                  @property
                  def reading_paused(self):
                      return not self.worker.keep_reading.is_set()
                  @property
                  def done_reading_event(self):
                      """
                      Done_reding does not mean that the iterator's buffer is empty.
                      Iterator might have done reading from underlying source, but the read
                      chunks might still be available for serving through .next() method.
                      :returns: An Event class instance.
                      """
                      return self.worker.EOF
                  @property
                  def done_reading(self):
                      """
                      Done_reading does not mean that the iterator's buffer is empty.
                      Iterator might have done reading from underlying source, but the read
                      chunks might still be available for serving through .next() method.
                      :returns: An Bool value.
                      """
                      return self.worker.EOF.is_set()
                  @property
                  def length(self):
                      """
                      returns int.
                      This is the length of the queue of chunks, not the length of
                      the combined contents in those chunks.
                      __len__() cannot be meaningfully implemented because this
                      reader is just flying through a bottomless pit content and
                      can only know the length of what it already saw.
                      If __len__() on WSGI server per PEP 3333 returns a value,
                      the response's length will be set to that. In order not to
                      confuse WSGI PEP3333 servers, we will not implement __len__
                      at all.
                      """
                      return len(self.data_queue)
                  def prepend(self, x):
                      self.data_queue.appendleft(x)
                  def append(self, x):
                      self.data_queue.append(x)
                  def extend(self, o):
                      self.data_queue.extend(o)
                  def __getitem__(self, i):
                      return self.data_queue[i]
              class SubprocessIOChunker:
                  """
                  Processor class wrapping handling of subprocess IO.
                  .. important::
                     Watch out for the method `__del__` on this class. If this object
                     is deleted, it will kill the subprocess, so avoid to
                     return the `output` attribute or usage of it like in the following
                     example::
                        # `args` expected to run a program that produces a lot of output
                        output = ''.join(SubprocessIOChunker(
                           args, shell=False, inputstream=inputstream, env=environ).output)
                        # `output` will not contain all the data, because the __del__ method
                        # has already killed the subprocess in this case before all output
                        # has been consumed.
                  In a way, this is a "communicate()" replacement with a twist.
                  - We are multithreaded. Writing in and reading out, err are all sep threads.
                  - We support concurrent (in and out) stream processing.
                  - The output is not a stream. It's a queue of read string (bytes, not str)
                    chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
                  - We are non-blocking in more respects than communicate()
                    (reading from subprocess out pauses when internal buffer is full, but
                     does not block the parent calling code. On the flip side, reading from
                     slow-yielding subprocess may block the iteration until data shows up. This
                     does not block the parallel inpipe reading occurring parallel thread.)
                  The purpose of the object is to allow us to wrap subprocess interactions into
                  an iterable that can be passed to a WSGI server as the application's return
                  value. Because of stream-processing-ability, WSGI does not have to read ALL
                  of the subprocess's output and buffer it, before handing it to WSGI server for
                  HTTP response. Instead, the class initializer reads just a bit of the stream
                  to figure out if error occurred or likely to occur and if not, just hands the
                  further iteration over subprocess output to the server for completion of HTTP
                  response.
                  The real or perceived subprocess error is trapped and raised as one of
                  OSError family of exceptions
                  Example usage:
                  #    try:
                  #        answer = SubprocessIOChunker(
                  #            cmd,
                  #            input,
                  #            buffer_size = 65536,
                  #            chunk_size = 4096
                  #            )
                  #    except (OSError) as e:
                  #        print str(e)
                  #        raise e
                  #
                  #    return answer
                  """
                  # TODO: johbo: This is used to make sure that the open end of the PIPE
                  # is closed in the end. It would be way better to wrap this into an
                  # object, so that it is closed automatically once it is consumed or
                  # something similar.
                  _close_input_fd = None
                  _closed = False
                  _stdout = None
                  _stderr = None
                  def __init__(self, cmd, input_stream=None, buffer_size=65536,
                               chunk_size=4096, starting_values=None, fail_on_stderr=True,
                               fail_on_return_code=True, **kwargs):
                      """
                      Initializes SubprocessIOChunker
                      :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
                      :param input_stream: (Default: None) A file-like, string, or file pointer.
                      :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
                      :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
                      :param starting_values: (Default: []) An array of strings to put in front of output que.
                      :param fail_on_stderr: (Default: True) Whether to raise an exception in
                                             case something is written to stderr.
                      :param fail_on_return_code: (Default: True) Whether to raise an
                                                  exception if the return code is not 0.
                      """
                      kwargs['shell'] = kwargs.get('shell', True)
                      starting_values = starting_values or []
                      if input_stream:
                          input_streamer = StreamFeeder(input_stream)
                          input_streamer.start()
                          input_stream = input_streamer.output
                          self._close_input_fd = input_stream
                      self._fail_on_stderr = fail_on_stderr
                      self._fail_on_return_code = fail_on_return_code
                      self.cmd = cmd
                      _p = subprocess.Popen(cmd, bufsize=-1, stdin=input_stream, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                            **kwargs)
                      self.process = _p
                      bg_out = BufferedGenerator('stdout', _p.stdout, buffer_size, chunk_size, starting_values)
                      bg_err = BufferedGenerator('stderr', _p.stderr, 10240, 1, bottomless=True)
                      while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length:
                          # doing this until we reach either end of file, or end of buffer.
                          bg_out.data_added_event.wait(0.2)
                          bg_out.data_added_event.clear()
                      # at this point it's still ambiguous if we are done reading or just full buffer.
                      # Either way, if error (returned by ended process, or implied based on
                      # presence of stuff in stderr output) we error out.
                      # Else, we are happy.
                      return_code = _p.poll()
                      ret_code_ok = return_code in [None, 0]
                      ret_code_fail = return_code is not None and return_code != 0
                      if (
                          (ret_code_fail and fail_on_return_code) or
                          (ret_code_ok and fail_on_stderr and bg_err.length)
                      ):
                          try:
                              _p.terminate()
                          except Exception:
                              pass
                          bg_out.stop()
                          out = b''.join(bg_out)
                          self._stdout = out
                          bg_err.stop()
                          err = b''.join(bg_err)
                          self._stderr = err
                          # code from https://github.com/schacon/grack/pull/7
                          if err.strip() == b'fatal: The remote end hung up unexpectedly' and out.startswith(b'0034shallow '):
                              bg_out = iter([out])
                              _p = None
                          elif err and fail_on_stderr:
                              text_err = err.decode()
                              raise OSError(
                                  f"Subprocess exited due to an error:\n{text_err}")
                          if ret_code_fail and fail_on_return_code:
                              text_err = err.decode()
                              if not err:
                                  # maybe get empty stderr, try stdout instead
                                  # in many cases git reports the errors on stdout too
                                  text_err = out.decode()
                              raise OSError(
                                  f"Subprocess exited with non 0 ret code:{return_code}: stderr:{text_err}")
                      self.stdout = bg_out
                      self.stderr = bg_err
                      self.inputstream = input_stream
                  def __str__(self):
                      proc = getattr(self, 'process', 'NO_PROCESS')
                      return f'SubprocessIOChunker: {proc}'
                  def __iter__(self):
                      return self
                  def __next__(self):
                      # Note: mikhail: We need to be sure that we are checking the return
                      # code after the stdout stream is closed. Some processes, e.g. git
                      # are doing some magic in between closing stdout and terminating the
                      # process and, as a result,  we are not getting return code on "slow"
                      # systems.
                      result = None
                      stop_iteration = None
                      try:
                          result = next(self.stdout)
                      except StopIteration as e:
                          stop_iteration = e
                      if self.process:
                          return_code = self.process.poll()
                          ret_code_fail = return_code is not None and return_code != 0
                          if ret_code_fail and self._fail_on_return_code:
                              self.stop_streams()
                              err = self.get_stderr()
                              raise OSError(
                                  f"Subprocess exited (exit_code:{return_code}) due to an error during iteration:\n{err}")
                      if stop_iteration:
                          raise stop_iteration
                      return result
                  def throw(self, exc_type, value=None, traceback=None):
                      if self.stdout.length or not self.stdout.done_reading:
                          raise exc_type(value)
                  def close(self):
                      if self._closed:
                          return
                      try:
                          self.process.terminate()
                      except Exception:
                          pass
                      if self._close_input_fd:
                          os.close(self._close_input_fd)
                      try:
                          self.stdout.close()
                      except Exception:
                          pass
                      try:
                          self.stderr.close()
                      except Exception:
                          pass
                      try:
                          os.close(self.inputstream)
                      except Exception:
                          pass
                      self._closed = True
                  def stop_streams(self):
                      getattr(self.stdout, 'stop', lambda: None)()
                      getattr(self.stderr, 'stop', lambda: None)()
                  def get_stdout(self):
                      if self._stdout:
                          return self._stdout
                      else:
                          return b''.join(self.stdout)
                  def get_stderr(self):
                      if self._stderr:
                          return self._stderr
                      else:
                          return b''.join(self.stderr)
              def run_command(arguments, env=None):
                  """
                  Run the specified command and return the stdout.
                  :param arguments: sequence of program arguments (including the program name)
                  :type arguments: list[str]
                  """
                  cmd = arguments
                  log.debug('Running subprocessio command %s', cmd)
                  proc = None
                  try:
                      _opts = {'shell': False, 'fail_on_stderr': False}
                      if env:
                          _opts.update({'env': env})
                      proc = SubprocessIOChunker(cmd, **_opts)
                      return b''.join(proc), b''.join(proc.stderr)
                  except OSError as err:
                      cmd = ' '.join(map(safe_str, cmd))  # human friendly CMD
                      tb_err = ("Couldn't run subprocessio command (%s).\n"
                                "Original error was:%s\n" % (cmd, err))
                      log.exception(tb_err)
                      raise Exception(tb_err)
                  finally:
                      if proc:
                          proc.close()

vcsserver/tests/test_hooks.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import threading
              import msgpack
              from http.server import BaseHTTPRequestHandler
              from socketserver import TCPServer
              import mercurial.ui
              import mock
              import pytest
              from vcsserver.hooks import HooksHttpClient
-             from vcsserver.lib.rc_json import json
+             from vcsserver.lib.ext_json import json
              from vcsserver import hooks
              def get_hg_ui(extras=None):
                  """Create a Config object with a valid RC_SCM_DATA entry."""
                  extras = extras or {}
                  required_extras = {
                      'username': '',
                      'repository': '',
                      'locked_by': '',
                      'scm': '',
                      'make_lock': '',
                      'action': '',
                      'ip': '',
                      'hooks_uri': 'fake_hooks_uri',
                  }
                  required_extras.update(extras)
                  hg_ui = mercurial.ui.ui()
                  hg_ui.setconfig(b'rhodecode', b'RC_SCM_DATA', json.dumps(required_extras))
                  return hg_ui
              def test_git_pre_receive_is_disabled():
                  extras = {'hooks': ['pull']}
                  response = hooks.git_pre_receive(None, None,
                                                   {'RC_SCM_DATA': json.dumps(extras)})
                  assert response == 0
              def test_git_post_receive_is_disabled():
                  extras = {'hooks': ['pull']}
                  response = hooks.git_post_receive(None, '',
                                                    {'RC_SCM_DATA': json.dumps(extras)})
                  assert response == 0
              def test_git_post_receive_calls_repo_size():
                  extras = {'hooks': ['push', 'repo_size']}
                  with mock.patch.object(hooks, '_call_hook') as call_hook_mock:
                      hooks.git_post_receive(
                          None, '', {'RC_SCM_DATA': json.dumps(extras)})
                  extras.update({'commit_ids': [], 'hook_type': 'post_receive',
                                 'new_refs': {'bookmarks': [], 'branches': [], 'tags': []}})
                  expected_calls = [
                      mock.call('repo_size', extras, mock.ANY),
                      mock.call('post_push', extras, mock.ANY),
                  ]
                  assert call_hook_mock.call_args_list == expected_calls
              def test_git_post_receive_does_not_call_disabled_repo_size():
                  extras = {'hooks': ['push']}
                  with mock.patch.object(hooks, '_call_hook') as call_hook_mock:
                      hooks.git_post_receive(
                          None, '', {'RC_SCM_DATA': json.dumps(extras)})
                  extras.update({'commit_ids': [], 'hook_type': 'post_receive',
                                 'new_refs': {'bookmarks': [], 'branches': [], 'tags': []}})
                  expected_calls = [
                      mock.call('post_push', extras, mock.ANY)
                  ]
                  assert call_hook_mock.call_args_list == expected_calls
              def test_repo_size_exception_does_not_affect_git_post_receive():
                  extras = {'hooks': ['push', 'repo_size']}
                  status = 0
                  def side_effect(name, *args, **kwargs):
                      if name == 'repo_size':
                          raise Exception('Fake exception')
                      else:
                          return status
                  with mock.patch.object(hooks, '_call_hook') as call_hook_mock:
                      call_hook_mock.side_effect = side_effect
                      result = hooks.git_post_receive(
                          None, '', {'RC_SCM_DATA': json.dumps(extras)})
                  assert result == status
              def test_git_pre_pull_is_disabled():
                  assert hooks.git_pre_pull({'hooks': ['push']}) == hooks.HookResponse(0, '')
              def test_git_post_pull_is_disabled():
                  assert (
                      hooks.git_post_pull({'hooks': ['push']}) == hooks.HookResponse(0, ''))
              class TestGetHooksClient:
                  def test_returns_http_client_when_protocol_matches(self):
                      hooks_uri = 'localhost:8000'
                      result = hooks._get_hooks_client({
                          'hooks_uri': hooks_uri,
                          'hooks_protocol': 'http'
                      })
                      assert isinstance(result, hooks.HooksHttpClient)
                      assert result.hooks_uri == hooks_uri
                  def test_return_celery_client_when_queue_and_backend_provided(self):
                      task_queue = 'redis://task_queue:0'
                      task_backend = task_queue
                      result = hooks._get_hooks_client({
                          'task_queue': task_queue,
                          'task_backend': task_backend
                      })
                      assert isinstance(result, hooks.HooksCeleryClient)
              class TestHooksHttpClient:
                  def test_init_sets_hooks_uri(self):
                      uri = 'localhost:3000'
                      client = hooks.HooksHttpClient(uri)
                      assert client.hooks_uri == uri
                  def test_serialize_returns_serialized_string(self):
                      client = hooks.HooksHttpClient('localhost:3000')
                      hook_name = 'test'
                      extras = {
                          'first': 1,
                          'second': 'two'
                      }
                      hooks_proto, result = client._serialize(hook_name, extras)
                      expected_result = msgpack.packb({
                          'method': hook_name,
                          'extras': extras,
                      })
                      assert hooks_proto == {'rc-hooks-protocol': 'msgpack.v1', 'Connection': 'keep-alive'}
                      assert result == expected_result
                  def test_call_queries_http_server(self, http_mirror):
                      client = hooks.HooksHttpClient(http_mirror.uri)
                      hook_name = 'test'
                      extras = {
                          'first': 1,
                          'second': 'two'
                      }
                      result = client(hook_name, extras)
                      expected_result = msgpack.unpackb(msgpack.packb({
                          'method': hook_name,
                          'extras': extras
                      }), raw=False)
                      assert result == expected_result
              @pytest.fixture
              def http_mirror(request):
                  server = MirrorHttpServer()
                  request.addfinalizer(server.stop)
                  return server
              class MirrorHttpHandler(BaseHTTPRequestHandler):
                  def do_POST(self):
                      length = int(self.headers['Content-Length'])
                      body = self.rfile.read(length)
                      self.send_response(200)
                      self.end_headers()
                      self.wfile.write(body)
              class MirrorHttpServer:
                  ip_address = '127.0.0.1'
                  port = 0
                  def __init__(self):
                      self._daemon = TCPServer((self.ip_address, 0), MirrorHttpHandler)
                      _, self.port = self._daemon.server_address
                      self._thread = threading.Thread(target=self._daemon.serve_forever)
                      self._thread.daemon = True
                      self._thread.start()
                  def stop(self):
                      self._daemon.shutdown()
                      self._thread.join()
                      self._daemon = None
                      self._thread = None
                  @property
                  def uri(self):
                      return '{}:{}'.format(self.ip_address, self.port)
              def test_hooks_http_client_init():
                  hooks_uri = 'http://localhost:8000'
                  client = HooksHttpClient(hooks_uri)
                  assert client.hooks_uri == hooks_uri
              def test_hooks_http_client_call():
                  hooks_uri = 'http://localhost:8000'
                  method = 'test_method'
                  extras = {'key': 'value'}
                  with \
                      mock.patch('http.client.HTTPConnection') as mock_connection,\
                      mock.patch('msgpack.load') as mock_load:
                      client = HooksHttpClient(hooks_uri)
                      mock_load.return_value = {'result': 'success'}
                      response = mock.MagicMock()
                      response.status = 200
                      mock_connection.request.side_effect = None
                      mock_connection.getresponse.return_value = response
                      result = client(method, extras)
                      mock_connection.assert_called_with(hooks_uri)
                      mock_connection.return_value.request.assert_called_once()
                      assert result == {'result': 'success'}
              def test_hooks_http_client_serialize():
                  method = 'test_method'
                  extras = {'key': 'value'}
                  headers, body = HooksHttpClient._serialize(method, extras)
                  assert headers == {'rc-hooks-protocol': HooksHttpClient.proto, 'Connection': 'keep-alive'}
                  assert msgpack.unpackb(body) == {'method': method, 'extras': extras}

vcsserver/tests/test_install_hooks.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import sys
              import stat
              import pytest
              import vcsserver
              import tempfile
              from vcsserver import hook_utils
              from vcsserver.hook_utils import set_permissions_if_needed, HOOKS_DIR_MODE, HOOKS_FILE_MODE
              from vcsserver.tests.fixture import no_newline_id_generator
-             from vcsserver.str_utils import safe_bytes
+             from vcsserver.lib.str_utils import safe_bytes
              from vcsserver.utils import AttributeDict
              class TestCheckRhodecodeHook:
                  def test_returns_false_when_hook_file_is_wrong_found(self, tmpdir):
                      hook = os.path.join(str(tmpdir), 'fake_hook_file.py')
                      with open(hook, 'wb') as f:
                          f.write(b'dummy test')
                          result = hook_utils.check_rhodecode_hook(hook)
                          assert result is False
                  def test_returns_true_when_no_hook_file_found(self, tmpdir):
                      hook = os.path.join(str(tmpdir), 'fake_hook_file_not_existing.py')
                      result = hook_utils.check_rhodecode_hook(hook)
                      assert result
                  @pytest.mark.parametrize("file_content, expected_result", [
                      ("RC_HOOK_VER = '3.3.3'\n", True),
                      ("RC_HOOK = '3.3.3'\n", False),
                  ], ids=no_newline_id_generator)
                  def test_signatures(self, file_content, expected_result, tmpdir):
                      hook = os.path.join(str(tmpdir), 'fake_hook_file_1.py')
                      with open(hook, 'wb') as f:
                          f.write(safe_bytes(file_content))
                      result = hook_utils.check_rhodecode_hook(hook)
                      assert result is expected_result
              class BaseInstallHooks:
                  HOOK_FILES = ()
                  def _check_hook_file_dir_mode(self, file_path):
                      dir_path = os.path.dirname(file_path)
                      assert os.path.exists(dir_path), f'dir {file_path} missing'
                      stat_info = os.stat(dir_path)
                      file_mode = stat.S_IMODE(stat_info.st_mode)
                      expected_mode = int(HOOKS_DIR_MODE)
                      assert expected_mode == file_mode, f'expected mode: {oct(expected_mode)} got: {oct(file_mode)} for {dir_path}'
                  def _check_hook_file_mode(self, file_path):
                      assert os.path.exists(file_path), f'path {file_path} missing'
                      stat_info = os.stat(file_path)
                      file_mode = stat.S_IMODE(stat_info.st_mode)
                      expected_mode = int(HOOKS_FILE_MODE)
                      assert expected_mode == file_mode, f'expected mode: {oct(expected_mode)} got: {oct(file_mode)} for {file_path}'
                  def _check_hook_file_content(self, file_path, executable):
                      executable = executable or sys.executable
                      with open(file_path, 'rt') as hook_file:
                          content = hook_file.read()
                      expected_env = '#!{}'.format(executable)
                      expected_rc_version = "\nRC_HOOK_VER = '{}'\n".format(vcsserver.get_version())
                      assert content.strip().startswith(expected_env)
                      assert expected_rc_version in content
                  def _create_fake_hook(self, file_path, content):
                      with open(file_path, 'w') as hook_file:
                          hook_file.write(content)
                  def create_dummy_repo(self, repo_type):
                      tmpdir = tempfile.mkdtemp()
                      repo = AttributeDict()
                      if repo_type == 'git':
                          repo.path = os.path.join(tmpdir, 'test_git_hooks_installation_repo')
                          os.makedirs(repo.path)
                          os.makedirs(os.path.join(repo.path, 'hooks'))
                          repo.bare = True
                      elif repo_type == 'svn':
                          repo.path = os.path.join(tmpdir, 'test_svn_hooks_installation_repo')
                          os.makedirs(repo.path)
                          os.makedirs(os.path.join(repo.path, 'hooks'))
                      return repo
                  def check_hooks(self, repo_path, repo_bare=True):
                      for file_name in self.HOOK_FILES:
                          if repo_bare:
                              file_path = os.path.join(repo_path, 'hooks', file_name)
                          else:
                              file_path = os.path.join(repo_path, '.git', 'hooks', file_name)
                          self._check_hook_file_dir_mode(file_path)
                          self._check_hook_file_mode(file_path)
                          self._check_hook_file_content(file_path, sys.executable)
              class TestInstallGitHooks(BaseInstallHooks):
                  HOOK_FILES = ('pre-receive', 'post-receive')
                  def test_hooks_are_installed(self):
                      repo = self.create_dummy_repo('git')
                      result = hook_utils.install_git_hooks(repo.path, repo.bare)
                      assert result
                      self.check_hooks(repo.path, repo.bare)
                  def test_hooks_are_replaced(self):
                      repo = self.create_dummy_repo('git')
                      hooks_path = os.path.join(repo.path, 'hooks')
                      for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
                          self._create_fake_hook(
                              file_path, content="RC_HOOK_VER = 'abcde'\n")
                      result = hook_utils.install_git_hooks(repo.path, repo.bare)
                      assert result
                      self.check_hooks(repo.path, repo.bare)
                  def test_non_rc_hooks_are_not_replaced(self):
                      repo = self.create_dummy_repo('git')
                      hooks_path = os.path.join(repo.path, 'hooks')
                      non_rc_content = 'echo "non rc hook"\n'
                      for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
                          self._create_fake_hook(
                              file_path, content=non_rc_content)
                      result = hook_utils.install_git_hooks(repo.path, repo.bare)
                      assert result
                      for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
                          with open(file_path, 'rt') as hook_file:
                              content = hook_file.read()
                          assert content == non_rc_content
                  def test_non_rc_hooks_are_replaced_with_force_flag(self):
                      repo = self.create_dummy_repo('git')
                      hooks_path = os.path.join(repo.path, 'hooks')
                      non_rc_content = 'echo "non rc hook"\n'
                      for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
                          self._create_fake_hook(
                              file_path, content=non_rc_content)
                      result = hook_utils.install_git_hooks(
                          repo.path, repo.bare, force_create=True)
                      assert result
                      self.check_hooks(repo.path, repo.bare)
              class TestInstallSvnHooks(BaseInstallHooks):
                  HOOK_FILES = ('pre-commit', 'post-commit')
                  def test_hooks_are_installed(self):
                      repo = self.create_dummy_repo('svn')
                      result = hook_utils.install_svn_hooks(repo.path)
                      assert result
                      self.check_hooks(repo.path)
                  def test_hooks_are_replaced(self):
                      repo = self.create_dummy_repo('svn')
                      hooks_path = os.path.join(repo.path, 'hooks')
                      for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
                          self._create_fake_hook(
                              file_path, content="RC_HOOK_VER = 'abcde'\n")
                      result = hook_utils.install_svn_hooks(repo.path)
                      assert result
                      self.check_hooks(repo.path)
                  def test_non_rc_hooks_are_not_replaced(self):
                      repo = self.create_dummy_repo('svn')
                      hooks_path = os.path.join(repo.path, 'hooks')
                      non_rc_content = 'echo "non rc hook"\n'
                      for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
                          self._create_fake_hook(
                              file_path, content=non_rc_content)
                      result = hook_utils.install_svn_hooks(repo.path)
                      assert result
                      for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
                          with open(file_path, 'rt') as hook_file:
                              content = hook_file.read()
                          assert content == non_rc_content
                  def test_non_rc_hooks_are_replaced_with_force_flag(self):
                      repo = self.create_dummy_repo('svn')
                      hooks_path = os.path.join(repo.path, 'hooks')
                      non_rc_content = 'echo "non rc hook"\n'
                      for file_path in [os.path.join(hooks_path, f) for f in self.HOOK_FILES]:
                          self._create_fake_hook(
                              file_path, content=non_rc_content)
                      result = hook_utils.install_svn_hooks(
                          repo.path, force_create=True)
                      assert result
                      self.check_hooks(repo.path, )
              def create_test_file(filename):
                  """Utility function to create a test file."""
                  with open(filename, 'w') as f:
                      f.write("Test file")
              def remove_test_file(filename):
                  """Utility function to remove a test file."""
                  if os.path.exists(filename):
                      os.remove(filename)
              @pytest.fixture
              def test_file():
                  filename = 'test_file.txt'
                  create_test_file(filename)
                  yield filename
                  remove_test_file(filename)
              def test_increase_permissions(test_file):
                  # Set initial lower permissions
                  initial_perms = 0o644
                  os.chmod(test_file, initial_perms)
                  # Set higher permissions
                  new_perms = 0o666
                  set_permissions_if_needed(test_file, new_perms)
                  # Check if permissions were updated
                  assert (os.stat(test_file).st_mode & 0o777) == new_perms
              def test_no_permission_change_needed(test_file):
                  # Set initial permissions
                  initial_perms = 0o666
                  os.chmod(test_file, initial_perms)
                  # Attempt to set the same permissions
                  set_permissions_if_needed(test_file, initial_perms)
                  # Check if permissions were unchanged
                  assert (os.stat(test_file).st_mode & 0o777) == initial_perms
              def test_no_permission_reduction(test_file):
                  # Set initial higher permissions
                  initial_perms = 0o666
                  os.chmod(test_file, initial_perms)
                  # Attempt to set lower permissions
                  lower_perms = 0o644
                  set_permissions_if_needed(test_file, lower_perms)
                  # Check if permissions were not reduced
                  assert (os.stat(test_file).st_mode & 0o777) == initial_perms
              def test_no_permission_reduction_when_on_777(test_file):
                  # Set initial higher permissions
                  initial_perms = 0o777
                  os.chmod(test_file, initial_perms)
                  # Attempt to set lower permissions
                  lower_perms = 0o755
                  set_permissions_if_needed(test_file, lower_perms)
                  # Check if permissions were not reduced
                  assert (os.stat(test_file).st_mode & 0o777) == initial_perms

vcsserver/tests/test_pygrack.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import io
              import more_itertools
              import dulwich.protocol
              import mock
              import pytest
              import webob
              import webtest
              from vcsserver import hooks, pygrack
-             from vcsserver.str_utils import ascii_bytes
+             from vcsserver.lib.str_utils import ascii_bytes
              @pytest.fixture()
              def pygrack_instance(tmpdir):
                  """
                  Creates a pygrack app instance.
                  Right now, it does not much helpful regarding the passed directory.
                  It just contains the required folders to pass the signature test.
                  """
                  for dir_name in ('config', 'head', 'info', 'objects', 'refs'):
                      tmpdir.mkdir(dir_name)
                  return pygrack.GitRepository('repo_name', str(tmpdir), 'git', False, {})
              @pytest.fixture()
              def pygrack_app(pygrack_instance):
                  """
                  Creates a pygrack app wrapped in webtest.TestApp.
                  """
                  return webtest.TestApp(pygrack_instance)
              def test_invalid_service_info_refs_returns_403(pygrack_app):
                  response = pygrack_app.get('/info/refs?service=git-upload-packs',
                                             expect_errors=True)
                  assert response.status_int == 403
              def test_invalid_endpoint_returns_403(pygrack_app):
                  response = pygrack_app.post('/git-upload-packs', expect_errors=True)
                  assert response.status_int == 403
              @pytest.mark.parametrize('sideband', [
                  'side-band-64k',
                  'side-band',
                  'side-band no-progress',
              ])
              def test_pre_pull_hook_fails_with_sideband(pygrack_app, sideband):
                  request = ''.join([
                      '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ',
                      f'multi_ack {sideband} ofs-delta\n',
                      '0000',
                      '0009done\n',
                  ])
                  with mock.patch('vcsserver.hooks.git_pre_pull', return_value=hooks.HookResponse(1, 'foo')):
                      response = pygrack_app.post(
                          '/git-upload-pack', params=request,
                          content_type='application/x-git-upload-pack')
                  data = io.BytesIO(response.body)
                  proto = dulwich.protocol.Protocol(data.read, None)
                  packets = list(proto.read_pkt_seq())
                  expected_packets = [
                      b'NAK\n', b'\x02foo', b'\x02Pre pull hook failed: aborting\n',
                      b'\x01' + pygrack.GitRepository.EMPTY_PACK,
                  ]
                  assert packets == expected_packets
              def test_pre_pull_hook_fails_no_sideband(pygrack_app):
                  request = ''.join([
                      '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
                      'multi_ack ofs-delta\n'
                      '0000',
                      '0009done\n',
                  ])
                  with mock.patch('vcsserver.hooks.git_pre_pull',
                                  return_value=hooks.HookResponse(1, 'foo')):
                      response = pygrack_app.post(
                          '/git-upload-pack', params=request,
                          content_type='application/x-git-upload-pack')
                  assert response.body == pygrack.GitRepository.EMPTY_PACK
              def test_pull_has_hook_messages(pygrack_app):
                  request = ''.join([
                      '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
                      'multi_ack side-band-64k ofs-delta\n'
                      '0000',
                      '0009done\n',
                  ])
                  pre_pull = 'pre_pull_output'
                  post_pull = 'post_pull_output'
                  with mock.patch('vcsserver.hooks.git_pre_pull',
                                  return_value=hooks.HookResponse(0, pre_pull)):
                      with mock.patch('vcsserver.hooks.git_post_pull',
                                      return_value=hooks.HookResponse(1, post_pull)):
                          with mock.patch('vcsserver.subprocessio.SubprocessIOChunker',
                                          return_value=more_itertools.always_iterable([b'0008NAK\n0009subp\n0000'])):
                              response = pygrack_app.post(
                                  '/git-upload-pack', params=request,
                                  content_type='application/x-git-upload-pack')
                  data = io.BytesIO(response.body)
                  proto = dulwich.protocol.Protocol(data.read, None)
                  packets = list(proto.read_pkt_seq())
                  assert packets == [b'NAK\n',
                                     # pre-pull only outputs if IT FAILS as in != 0 ret code
                                     #b'\x02pre_pull_output',
                                     b'subp\n',
                                     b'\x02post_pull_output']
              def test_get_want_capabilities(pygrack_instance):
                  data = io.BytesIO(
                      b'0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' +
                      b'multi_ack side-band-64k ofs-delta\n00000009done\n')
                  request = webob.Request({
                      'wsgi.input': data,
                      'REQUEST_METHOD': 'POST',
                      'webob.is_body_seekable': True
                  })
                  capabilities = pygrack_instance._get_want_capabilities(request)
                  assert capabilities == frozenset(
                      (b'ofs-delta', b'multi_ack', b'side-band-64k'))
                  assert data.tell() == 0
              @pytest.mark.parametrize('data,capabilities,expected', [
                  ('foo', [], []),
                  ('', [pygrack.CAPABILITY_SIDE_BAND_64K], []),
                  ('', [pygrack.CAPABILITY_SIDE_BAND], []),
                  ('foo', [pygrack.CAPABILITY_SIDE_BAND_64K], [b'0008\x02foo']),
                  ('foo', [pygrack.CAPABILITY_SIDE_BAND], [b'0008\x02foo']),
                  ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'03ed\x02' + b'f' * 1000]),
                  ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995, b'000a\x02fffff']),
                  ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'fff0\x02' + b'f' * 65515, b'000a\x02fffff']),
                  ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995] * 65 + [b'0352\x02' + b'f' * 845]),
              ], ids=[
                  'foo-empty',
                  'empty-64k', 'empty',
                  'foo-64k', 'foo',
                  'f-1000-64k', 'f-1000',
                  'f-65520-64k', 'f-65520'])
              def test_get_messages(pygrack_instance, data, capabilities, expected):
                  messages = pygrack_instance._get_messages(data, capabilities)
                  assert messages == expected
              @pytest.mark.parametrize('response,capabilities,pre_pull_messages,post_pull_messages', [
                  # Unexpected response
                  ([b'unexpected_response[no_initial_header]'], [pygrack.CAPABILITY_SIDE_BAND_64K], 'foo', 'bar'),
                  # No sideband
                  ([b'no-sideband'], [], 'foo', 'bar'),
                  # No messages
                  ([b'no-messages'], [pygrack.CAPABILITY_SIDE_BAND_64K], '', ''),
              ])
              def test_inject_messages_to_response_nothing_to_do(
                      pygrack_instance, response, capabilities, pre_pull_messages, post_pull_messages):
                  new_response = pygrack_instance._build_post_pull_response(
                      more_itertools.always_iterable(response), capabilities, pre_pull_messages, post_pull_messages)
                  assert list(new_response) == response
              @pytest.mark.parametrize('capabilities', [
                  [pygrack.CAPABILITY_SIDE_BAND],
                  [pygrack.CAPABILITY_SIDE_BAND_64K],
              ])
              def test_inject_messages_to_response_single_element(pygrack_instance, capabilities):
                  response = [b'0008NAK\n0009subp\n0000']
                  new_response = pygrack_instance._build_post_pull_response(
                      more_itertools.always_iterable(response), capabilities, 'foo', 'bar')
                  expected_response = b''.join([
                      b'0008NAK\n',
                      b'0008\x02foo',
                      b'0009subp\n',
                      b'0008\x02bar',
                      b'0000'])
                  assert b''.join(new_response) == expected_response
              @pytest.mark.parametrize('capabilities', [
                  [pygrack.CAPABILITY_SIDE_BAND],
                  [pygrack.CAPABILITY_SIDE_BAND_64K],
              ])
              def test_inject_messages_to_response_multi_element(pygrack_instance, capabilities):
                  response = more_itertools.always_iterable([
                      b'0008NAK\n000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n0000'
                  ])
                  new_response = pygrack_instance._build_post_pull_response(response, capabilities, 'foo', 'bar')
                  expected_response = b''.join([
                      b'0008NAK\n',
                      b'0008\x02foo',
                      b'000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n',
                      b'0008\x02bar',
                      b'0000'
                  ])
                  assert b''.join(new_response) == expected_response
              def test_build_failed_pre_pull_response_no_sideband(pygrack_instance):
                  response = pygrack_instance._build_failed_pre_pull_response([], 'foo')
                  assert response == [pygrack.GitRepository.EMPTY_PACK]
              @pytest.mark.parametrize('capabilities', [
                  [pygrack.CAPABILITY_SIDE_BAND],
                  [pygrack.CAPABILITY_SIDE_BAND_64K],
                  [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'],
              ])
              def test_build_failed_pre_pull_response(pygrack_instance, capabilities):
                  response = pygrack_instance._build_failed_pre_pull_response(capabilities, 'foo')
                  expected_response = [
                      b'0008NAK\n', b'0008\x02foo', b'0024\x02Pre pull hook failed: aborting\n',
                      b'%04x\x01%s' % (len(pygrack.GitRepository.EMPTY_PACK) + 5, pygrack.GitRepository.EMPTY_PACK),
                      pygrack.GitRepository.FLUSH_PACKET,
                  ]
                  assert response == expected_response
              def test_inject_messages_to_response_generator(pygrack_instance):
                  def response_generator():
                      response = [
                          # protocol start
                          b'0008NAK\n',
                      ]
                      response += [ascii_bytes(f'000asubp{x}\n') for x in range(1000)]
                      response += [
                          # protocol end
                          pygrack.GitRepository.FLUSH_PACKET
                      ]
                      for elem in response:
                          yield elem
                  new_response = pygrack_instance._build_post_pull_response(
                      response_generator(), [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'], 'PRE_PULL_MSG\n', 'POST_PULL_MSG\n')
                  assert iter(new_response)
                  expected_response = b''.join([
                      # start
                      b'0008NAK\n0012\x02PRE_PULL_MSG\n',
                  ] + [
                      # ... rest
                      ascii_bytes(f'000asubp{x}\n') for x in range(1000)
                  ] + [
                      # final message,
                      b'0013\x02POST_PULL_MSG\n0000',
                  ])
                  assert b''.join(new_response) == expected_response

vcsserver/tests/test_scm_app.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import mercurial.hg
              import mercurial.ui
              import mercurial.error
              import mock
              import pytest
              import webtest
              from vcsserver import scm_app
-             from vcsserver.str_utils import ascii_bytes
+             from vcsserver.lib.str_utils import ascii_bytes
              def test_hg_does_not_accept_invalid_cmd(tmpdir):
                  repo = mercurial.hg.repository(mercurial.ui.ui(), ascii_bytes(str(tmpdir)), create=True)
                  app = webtest.TestApp(scm_app.HgWeb(repo))
                  response = app.get('/repo?cmd=invalidcmd', expect_errors=True)
                  assert response.status_int == 400
              def test_create_hg_wsgi_app_requirement_error(tmpdir):
                  repo = mercurial.hg.repository(mercurial.ui.ui(), ascii_bytes(str(tmpdir)), create=True)
                  config = (
                      ('paths', 'default', ''),
                  )
                  with mock.patch('vcsserver.scm_app.HgWeb') as hgweb_mock:
                      hgweb_mock.side_effect = mercurial.error.RequirementError()
                      with pytest.raises(Exception):
                          scm_app.create_hg_wsgi_app(str(tmpdir), repo, config)
              def test_git_returns_not_found(tmpdir):
                  app = webtest.TestApp(
                      scm_app.GitHandler(str(tmpdir), 'repo_name', 'git', False, {}))
                  response = app.get('/repo_name/inforefs?service=git-upload-pack',
                                     expect_errors=True)
                  assert response.status_int == 404
              def test_git(tmpdir):
                  for dir_name in ('config', 'head', 'info', 'objects', 'refs'):
                      tmpdir.mkdir(dir_name)
                  app = webtest.TestApp(
                      scm_app.GitHandler(str(tmpdir), 'repo_name', 'git', False, {}))
                  # We set service to git-upload-packs to trigger a 403
                  response = app.get('/repo_name/inforefs?service=git-upload-packs',
                                     expect_errors=True)
                  assert response.status_int == 403
              def test_git_fallbacks_to_git_folder(tmpdir):
                  tmpdir.mkdir('.git')
                  for dir_name in ('config', 'head', 'info', 'objects', 'refs'):
                      tmpdir.mkdir(os.path.join('.git', dir_name))
                  app = webtest.TestApp(
                      scm_app.GitHandler(str(tmpdir), 'repo_name', 'git', False, {}))
                  # We set service to git-upload-packs to trigger a 403
                  response = app.get('/repo_name/inforefs?service=git-upload-packs',
                                     expect_errors=True)
                  assert response.status_int == 403

vcsserver/tests/test_subprocessio.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import io
              import os
              import sys
              import pytest
              from vcsserver import subprocessio
-             from vcsserver.str_utils import ascii_bytes
+             from vcsserver.lib.str_utils import ascii_bytes
              class FileLikeObj:  # pragma: no cover
                  def __init__(self, data: bytes, size):
                      chunks = size // len(data)
                      self.stream = self._get_stream(data, chunks)
                  def _get_stream(self, data, chunks):
                      for x in range(chunks):
                          yield data
                  def read(self, n):
                      buffer_stream = b''
                      for chunk in self.stream:
                          buffer_stream += chunk
                          if len(buffer_stream) >= n:
                              break
                      # self.stream = self.bytes[n:]
                      return buffer_stream
              @pytest.fixture(scope='module')
              def environ():
                  """Delete coverage variables, as they make the tests fail."""
                  env = dict(os.environ)
                  for key in list(env.keys()):
                      if key.startswith('COV_CORE_'):
                          del env[key]
                  return env
              def _get_python_args(script):
                  return [sys.executable, '-c', 'import sys; import time; import shutil; ' + script]
              def test_raise_exception_on_non_zero_return_code(environ):
                  call_args = _get_python_args('raise ValueError("fail")')
                  with pytest.raises(OSError):
                      b''.join(subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ))
              def test_does_not_fail_on_non_zero_return_code(environ):
                  call_args = _get_python_args('sys.stdout.write("hello"); sys.exit(1)')
                  proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_return_code=False, env=environ)
                  output = b''.join(proc)
                  assert output == b'hello'
              def test_raise_exception_on_stderr(environ):
                  call_args = _get_python_args('sys.stderr.write("WRITE_TO_STDERR"); time.sleep(1);')
                  with pytest.raises(OSError) as excinfo:
                      b''.join(subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ))
                  assert 'exited due to an error:\nWRITE_TO_STDERR' in str(excinfo.value)
              def test_does_not_fail_on_stderr(environ):
                  call_args = _get_python_args('sys.stderr.write("WRITE_TO_STDERR"); sys.stderr.flush; time.sleep(2);')
                  proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_stderr=False, env=environ)
                  output = b''.join(proc)
                  assert output == b''
              @pytest.mark.parametrize('size', [
 ,
 ** 5
              ])
              def test_output_with_no_input(size, environ):
                  call_args = _get_python_args(f'sys.stdout.write("X" * {size});')
                  proc = subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ)
                  output = b''.join(proc)
                  assert output == ascii_bytes("X" * size)
              @pytest.mark.parametrize('size', [
 ,
 ** 5
              ])
              def test_output_with_no_input_does_not_fail(size, environ):
                  call_args = _get_python_args(f'sys.stdout.write("X" * {size}); sys.exit(1)')
                  proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_return_code=False, env=environ)
                  output = b''.join(proc)
                  assert output == ascii_bytes("X" * size)
              @pytest.mark.parametrize('size', [
 ,
 ** 5
              ])
              def test_output_with_input(size, environ):
                  data_len = size
                  inputstream = FileLikeObj(b'X', size)
                  # This acts like the cat command.
                  call_args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)')
                  # note: in this tests we explicitly don't assign chunker to a variable and let it stream directly
                  output = b''.join(
                      subprocessio.SubprocessIOChunker(call_args, shell=False, input_stream=inputstream, env=environ)
                  )
                  assert len(output) == data_len
              @pytest.mark.parametrize('size', [
 ,
 ** 5
              ])
              def test_output_with_input_skipping_iterator(size, environ):
                  data_len = size
                  inputstream = FileLikeObj(b'X', size)
                  # This acts like the cat command.
                  call_args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)')
                  # Note: assigning the chunker makes sure that it is not deleted too early
                  proc = subprocessio.SubprocessIOChunker(call_args, shell=False, input_stream=inputstream, env=environ)
                  output = b''.join(proc.stdout)
                  assert len(output) == data_len

vcsserver/tests/test_svn.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import io
              import mock
              import pytest
              import sys
-             from vcsserver.str_utils import ascii_bytes
+             from vcsserver.lib.str_utils import ascii_bytes
              class MockPopen:
                  def __init__(self, stderr):
                      self.stdout = io.BytesIO(b'')
                      self.stderr = io.BytesIO(stderr)
                      self.returncode = 1
                  def wait(self):
                      pass
              INVALID_CERTIFICATE_STDERR = '\n'.join([
                  'svnrdump: E230001: Unable to connect to a repository at URL url',
                  'svnrdump: E230001: Server SSL certificate verification failed: issuer is not trusted',
              ])
              @pytest.mark.parametrize('stderr,expected_reason', [
                  (INVALID_CERTIFICATE_STDERR, 'INVALID_CERTIFICATE'),
                  ('svnrdump: E123456', 'UNKNOWN:svnrdump: E123456'),
              ], ids=['invalid-cert-stderr', 'svnrdump-err-123456'])
              @pytest.mark.xfail(sys.platform == "cygwin",
                                 reason="SVN not packaged for Cygwin")
              def test_import_remote_repository_certificate_error(stderr, expected_reason):
                  from vcsserver.remote import svn_remote
                  factory = mock.Mock()
                  factory.repo = mock.Mock(return_value=mock.Mock())
                  remote = svn_remote.SvnRemote(factory)
                  remote.is_path_valid_repository = lambda wire, path: True
                  with mock.patch('subprocess.Popen',
                                  return_value=MockPopen(ascii_bytes(stderr))):
                      with pytest.raises(Exception) as excinfo:
                          remote.import_remote_repository({'path': 'path'}, 'url')
                  expected_error_args = 'Failed to dump the remote repository from url. Reason:{}'.format(expected_reason)
                  assert excinfo.value.args[0] == expected_error_args
              def test_svn_libraries_can_be_imported():
                  import svn.client  # noqa
                  assert svn.client is not None
              @pytest.mark.parametrize('example_url, parts', [
                  ('http://server.com', ('', '', 'http://server.com')),
                  ('http://user@server.com', ('user', '', 'http://user@server.com')),
                  ('http://user:pass@server.com', ('user', 'pass', 'http://user:pass@server.com')),
                  ('<script>', ('', '', '<script>')),
                  ('http://', ('', '', 'http://')),
              ])
              def test_username_password_extraction_from_url(example_url, parts):
                  from vcsserver.remote import svn_remote
                  factory = mock.Mock()
                  factory.repo = mock.Mock(return_value=mock.Mock())
                  remote = svn_remote.SvnRemote(factory)
                  remote.is_path_valid_repository = lambda wire, path: True
                  assert remote.get_url_and_credentials(example_url) == parts
              @pytest.mark.parametrize('call_url', [
                  b'https://svn.code.sf.net/p/svnbook/source/trunk/',
                  b'https://marcink@svn.code.sf.net/p/svnbook/source/trunk/',
                  b'https://marcink:qweqwe@svn.code.sf.net/p/svnbook/source/trunk/',
              ])
              def test_check_url(call_url):
                  from vcsserver.remote import svn_remote
                  factory = mock.Mock()
                  factory.repo = mock.Mock(return_value=mock.Mock())
                  remote = svn_remote.SvnRemote(factory)
                  remote.is_path_valid_repository = lambda wire, path: True
                  assert remote.check_url(call_url, {'dummy': 'config'})

vcsserver/tests/test_utils.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import pytest
-             from vcsserver.str_utils import ascii_bytes, ascii_str, convert_to_str
+             from vcsserver.lib.str_utils import ascii_bytes, ascii_str, convert_to_str
              @pytest.mark.parametrize('given, expected', [
                  ('a', b'a'),
                  ('a', b'a'),
              ])
              def test_ascii_bytes(given, expected):
                  assert ascii_bytes(given) == expected
              @pytest.mark.parametrize('given', [
                  'å',
                  'å'.encode('utf8')
              ])
              def test_ascii_bytes_raises(given):
                  with pytest.raises(ValueError):
                      ascii_bytes(given)
              @pytest.mark.parametrize('given, expected', [
                  (b'a', 'a'),
              ])
              def test_ascii_str(given, expected):
                  assert ascii_str(given) == expected
              @pytest.mark.parametrize('given', [
                  'a',
                  'å'.encode('utf8'),
                  'å'
              ])
              def test_ascii_str_raises(given):
                  with pytest.raises(ValueError):
                      ascii_str(given)
              @pytest.mark.parametrize('given, expected', [
                  ('a', 'a'),
                  (b'a', 'a'),
                  # tuple
                  (('a', b'b', b'c'), ('a', 'b', 'c')),
                  # nested tuple
                  (('a', b'b', (b'd', b'e')), ('a', 'b', ('d', 'e'))),
                  # list
                  (['a', b'b', b'c'], ['a', 'b', 'c']),
                  # mixed
                  (['a', b'b', b'c', (b'b1', b'b2')], ['a', 'b', 'c', ('b1', 'b2')])
              ])
              def test_convert_to_str(given, expected):
                  assert convert_to_str(given) == expected

vcsserver/tests/test_wsgi_app_caller.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import wsgiref.simple_server
              import wsgiref.validate
              from vcsserver import wsgi_app_caller
-             from vcsserver.str_utils import ascii_bytes, safe_str
+             from vcsserver.lib.str_utils import ascii_bytes, safe_str
              @wsgiref.validate.validator
              def demo_app(environ, start_response):
                  """WSGI app used for testing."""
                  input_data = safe_str(environ['wsgi.input'].read(1024))
                  data = [
                      'Hello World!\n',
                      f'input_data={input_data}\n',
                  ]
                  for key, value in sorted(environ.items()):
                      data.append(f'{key}={value}\n')
                  write = start_response("200 OK", [('Content-Type', 'text/plain')])
                  write(b'Old school write method\n')
                  write(b'***********************\n')
                  return list(map(ascii_bytes, data))
              BASE_ENVIRON = {
                  'REQUEST_METHOD': 'GET',
                  'SERVER_NAME': 'localhost',
                  'SERVER_PORT': '80',
                  'SCRIPT_NAME': '',
                  'PATH_INFO': '/',
                  'QUERY_STRING': '',
                  'foo.var': 'bla',
              }
              def test_complete_environ():
                  environ = dict(BASE_ENVIRON)
                  data = b"data"
                  wsgi_app_caller._complete_environ(environ, data)
                  wsgiref.validate.check_environ(environ)
                  assert data == environ['wsgi.input'].read(1024)
              def test_start_response():
                  start_response = wsgi_app_caller._StartResponse()
                  status = '200 OK'
                  headers = [('Content-Type', 'text/plain')]
                  start_response(status, headers)
                  assert status == start_response.status
                  assert headers == start_response.headers
              def test_start_response_with_error():
                  start_response = wsgi_app_caller._StartResponse()
                  status = '500 Internal Server Error'
                  headers = [('Content-Type', 'text/plain')]
                  start_response(status, headers, (None, None, None))
                  assert status == start_response.status
                  assert headers == start_response.headers
              def test_wsgi_app_caller():
                  environ = dict(BASE_ENVIRON)
                  input_data = 'some text'
                  caller = wsgi_app_caller.WSGIAppCaller(demo_app)
                  responses, status, headers = caller.handle(environ, input_data)
                  response = b''.join(responses)
                  assert status == '200 OK'
                  assert headers == [('Content-Type', 'text/plain')]
                  assert response.startswith(b'Old school write method\n***********************\n')
                  assert b'Hello World!\n' in response
                  assert b'foo.var=bla\n' in response
                  assert ascii_bytes(f'input_data={input_data}\n') in response

vcsserver/tweens/request_wrapper.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import base64
              import logging
              import time
              import msgpack
              import vcsserver
-             from vcsserver.str_utils import safe_str
+             from vcsserver.lib.str_utils import safe_str
              log = logging.getLogger(__name__)
              def get_access_path(environ):
                  path = environ.get('PATH_INFO')
                  return path
              def get_user_agent(environ):
                  return environ.get('HTTP_USER_AGENT')
              def get_call_context(request) -> dict:
                  cc = {}
                  registry = request.registry
                  if hasattr(registry, 'vcs_call_context'):
                      cc.update({
                          'X-RC-Method': registry.vcs_call_context.get('method'),
                          'X-RC-Repo-Name': registry.vcs_call_context.get('repo_name')
                      })
                  return cc
              def get_headers_call_context(environ, strict=True):
                  if 'HTTP_X_RC_VCS_STREAM_CALL_CONTEXT' in environ:
                      packed_cc = base64.b64decode(environ['HTTP_X_RC_VCS_STREAM_CALL_CONTEXT'])
                      return msgpack.unpackb(packed_cc)
                  elif strict:
                      raise ValueError('Expected header HTTP_X_RC_VCS_STREAM_CALL_CONTEXT not found')
              class RequestWrapperTween:
                  def __init__(self, handler, registry):
                      self.handler = handler
                      self.registry = registry
                      # one-time configuration code goes here
                  def __call__(self, request):
                      start = time.time()
                      log.debug('Starting request time measurement')
                      response = None
                      try:
                          response = self.handler(request)
                      finally:
                          ua = get_user_agent(request.environ)
                          call_context = get_call_context(request)
                          vcs_method = call_context.get('X-RC-Method', '_NO_VCS_METHOD')
                          repo_name = call_context.get('X-RC-Repo-Name', '')
                          count = request.request_count()
                          _ver_ = vcsserver.get_version()
                          _path = safe_str(get_access_path(request.environ))
                          ip = '127.0.0.1'
                          match_route = request.matched_route.name if request.matched_route else "NOT_FOUND"
                          resp_code = getattr(response, 'status_code', 'UNDEFINED')
                          _view_path = f"{repo_name}@{_path}/{vcs_method}"
                          total = time.time() - start
                          log.info(
                              'Req[%4s] IP: %s %s Request to %s time: %.4fs [%s], VCSServer %s',
                              count, ip, request.environ.get('REQUEST_METHOD'),
                              _view_path, total, ua, _ver_,
                              extra={"time": total, "ver": _ver_, "code": resp_code,
                                     "path": _path, "view_name": match_route, "user_agent": ua,
                                     "vcs_method": vcs_method, "repo_name": repo_name}
                          )
                          statsd = request.registry.statsd
                          if statsd:
                              match_route = request.matched_route.name if request.matched_route else _path
                              elapsed_time_ms = round(1000.0 * total)  # use ms only
                              statsd.timing(
                                  "vcsserver_req_timing.histogram", elapsed_time_ms,
                                  tags=[
                                      f"view_name:{match_route}",
                                      f"code:{resp_code}"
                                  ],
                                  use_decimals=False
                              )
                              statsd.incr(
                                  "vcsserver_req_total", tags=[
                                      f"view_name:{match_route}",
                                      f"code:{resp_code}"
                                  ])
                      return response
              def includeme(config):
                  config.add_tween(
                      'vcsserver.tweens.request_wrapper.RequestWrapperTween',
                  )

vcsserver/wsgi_app_caller.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              """Extract the responses of a WSGI app."""
              __all__ = ('WSGIAppCaller',)
              import io
              import logging
              import os
-             from vcsserver.str_utils import ascii_bytes
+             from vcsserver.lib.str_utils import ascii_bytes
              log = logging.getLogger(__name__)
              DEV_NULL = open(os.devnull)
              def _complete_environ(environ, input_data: bytes):
                  """Update the missing wsgi.* variables of a WSGI environment.
                  :param environ: WSGI environment to update
                  :type environ: dict
                  :param input_data: data to be read by the app
                  :type input_data: bytes
                  """
                  environ.update({
                      'wsgi.version': (1, 0),
                      'wsgi.url_scheme': 'http',
                      'wsgi.multithread': True,
                      'wsgi.multiprocess': True,
                      'wsgi.run_once': False,
                      'wsgi.input': io.BytesIO(input_data),
                      'wsgi.errors': DEV_NULL,
                  })
              # pylint: disable=too-few-public-methods
              class _StartResponse:
                  """Save the arguments of a start_response call."""
                  __slots__ = ['status', 'headers', 'content']
                  def __init__(self):
                      self.status = None
                      self.headers = None
                      self.content = []
                  def __call__(self, status, headers, exc_info=None):
                      # TODO(skreft): do something meaningful with the exc_info
                      exc_info = None  # avoid dangling circular reference
                      self.status = status
                      self.headers = headers
                      return self.write
                  def write(self, content):
                      """Write method returning when calling this object.
                      All the data written is then available in content.
                      """
                      self.content.append(content)
              class WSGIAppCaller:
                  """Calls a WSGI app."""
                  def __init__(self, app):
                      """
                      :param app: WSGI app to call
                      """
                      self.app = app
                  def handle(self, environ, input_data):
                      """Process a request with the WSGI app.
                      The returned data of the app is fully consumed into a list.
                      :param environ: WSGI environment to update
                      :type environ: dict
                      :param input_data: data to be read by the app
                      :type input_data: str/bytes
                      :returns: a tuple with the contents, status and headers
                      :rtype: (list<str>, str, list<(str, str)>)
                      """
                      _complete_environ(environ, ascii_bytes(input_data, allow_bytes=True))
                      start_response = _StartResponse()
                      log.debug("Calling wrapped WSGI application")
                      responses = self.app(environ, start_response)
                      responses_list = list(responses)
                      existing_responses = start_response.content
                      if existing_responses:
                          log.debug("Adding returned response to response written via write()")
                          existing_responses.extend(responses_list)
                          responses_list = existing_responses
                      if hasattr(responses, 'close'):
                          log.debug("Closing iterator from WSGI application")
                          responses.close()
                      log.debug("Handling of WSGI request done, returning response")
                      return responses_list, start_response.status, start_response.headers

vcsserver/lib/rc_cache/archive_cache.py

0 removed 0 -87

NO CONTENT: file was removed

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages