diff --git a/vcsserver/base.py b/vcsserver/base.py --- a/vcsserver/base.py +++ b/vcsserver/base.py @@ -16,10 +16,12 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import os import sys +import tempfile import traceback import logging import urllib.parse +from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store from vcsserver.lib.rc_cache import region_meta from vcsserver import exceptions @@ -84,6 +86,7 @@ def raise_from_original(new_type, org_ex del exc_traceback + class ArchiveNode(object): def __init__(self, path, mode, is_link, raw_bytes): self.path = path @@ -92,28 +95,51 @@ class ArchiveNode(object): self.raw_bytes = raw_bytes -def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id, write_metadata=True, extra_metadata=None): +def store_archive_in_cache(node_walker, archive_key, kind, mtime, archive_at_path, archive_dir_name, + commit_id, write_metadata=True, extra_metadata=None, cache_config=None): """ + Function that would store an generate archive and send it to a dedicated backend store + In here we use diskcache + + :param node_walker: a generator returning nodes to add to archive + :param archive_key: key used to store the path + :param kind: archive kind + :param mtime: time of creation + :param archive_at_path: default '/' the path at archive was started. if this is not '/' it means it's a partial archive + :param archive_dir_name: inside dir name when creating an archive + :param commit_id: commit sha of revision archive was created at + :param write_metadata: + :param extra_metadata: + :param cache_config: + walker should be a file walker, for example: - def walker(): + def node_walker(): for file_info in files: yield ArchiveNode(fn, mode, is_link, ctx[fn].data) """ extra_metadata = extra_metadata or {} - archive_dest_path = safe_bytes(archive_dest_path) + + d_cache = get_archival_cache_store(config=cache_config) + + if archive_key in d_cache: + with d_cache as d_cache_reader: + reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True) + return reader.name + + archive_tmp_path = safe_bytes(tempfile.mkstemp()[1]) + log.debug('Creating new temp archive in %s', archive_tmp_path) if kind == "tgz": - archiver = archival.tarit(archive_dest_path, mtime, b"gz") + archiver = archival.tarit(archive_tmp_path, mtime, b"gz") elif kind == "tbz2": - archiver = archival.tarit(archive_dest_path, mtime, b"bz2") + archiver = archival.tarit(archive_tmp_path, mtime, b"bz2") elif kind == 'zip': - archiver = archival.zipit(archive_dest_path, mtime) + archiver = archival.zipit(archive_tmp_path, mtime) else: raise exceptions.ArchiveException()( f'Remote does not support: "{kind}" archive type.') - for f in walker(commit_id, archive_at_path): + for f in node_walker(commit_id, archive_at_path): f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/')) try: archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes()) @@ -133,46 +159,37 @@ def archive_repo(walker, archive_dest_pa f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt') archiver.addfile(f_path, 0o644, False, b'\n'.join(meta)) - return archiver.done() + archiver.done() + + # ensure set & get are atomic + with d_cache.transact(): + + with open(archive_tmp_path, 'rb') as archive_file: + add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True) + if not add_result: + log.error('Failed to store cache for key=%s', archive_key) + + os.remove(archive_tmp_path) + + reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True) + if not reader: + raise AssertionError(f'empty reader on key={archive_key} added={add_result}') + + return reader.name class BinaryEnvelope(object): - def __init__(self, value: bytes, bin_type=True): - self.value = value - self.bin_type = bin_type - - def __len__(self): - return len(self.value) - - def __getitem__(self, index): - return self.value[index] - - def __iter__(self): - return iter(self.value) - - def __str__(self): - return str(self.value) - - def __repr__(self): - return repr(self.value) - - def __eq__(self, other): - if isinstance(other, BinaryEnvelope): - return self.value == other.value - return False - - def __ne__(self, other): - return not self.__eq__(other) - - def __add__(self, other): - if isinstance(other, BinaryEnvelope): - return BinaryEnvelope(self.value + other.value) - raise TypeError(f"unsupported operand type(s) for +: 'BinaryEnvelope' and '{type(other)}'") - - def __radd__(self, other): - if isinstance(other, BinaryEnvelope): - return BinaryEnvelope(other.value + self.value) - raise TypeError(f"unsupported operand type(s) for +: '{type(other)}' and 'BinaryEnvelope'") + def __init__(self, val): + self.val = val +class BytesEnvelope(bytes): + def __new__(cls, content): + if isinstance(content, bytes): + return super().__new__(cls, content) + else: + raise TypeError('Content must be bytes.') + +class BinaryBytesEnvelope(BytesEnvelope): + pass