# HG changeset patch # User Milka Kuzminski # Date 2020-10-19 12:54:53 # Node ID 5a847e1afc2bad3206d4649411d72ad923b01042 # Parent 97e7c4690b3ba33c905db0068c0d8bd1c7982c46 archive: implemented efficient way to perform archive for each repository. diff --git a/vcsserver/base.py b/vcsserver/base.py --- a/vcsserver/base.py +++ b/vcsserver/base.py @@ -14,12 +14,15 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - +import os import sys import traceback import logging import urlparse +from vcsserver import exceptions +from vcsserver.exceptions import NoContentException +from vcsserver.hgcompat import (archival) from vcsserver.lib.rc_cache import region_meta log = logging.getLogger(__name__) @@ -74,3 +77,54 @@ def raise_from_original(new_type): raise new_exc, None, exc_traceback finally: del exc_traceback + + +class ArchiveNode(object): + def __init__(self, path, mode, is_link, raw_bytes): + self.path = path + self.mode = mode + self.is_link = is_link + self.raw_bytes = raw_bytes + + +def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path, + archive_dir_name, commit_id, write_metadata=True, extra_metadata=None): + """ + walker should be a file walker, for example: + def walker(): + for file_info in files: + yield ArchiveNode(fn, mode, is_link, ctx[fn].data) + """ + extra_metadata = extra_metadata or {} + + if kind == "tgz": + archiver = archival.tarit(archive_dest_path, mtime, "gz") + elif kind == "tbz2": + archiver = archival.tarit(archive_dest_path, mtime, "bz2") + elif kind == 'zip': + archiver = archival.zipit(archive_dest_path, mtime) + else: + raise exceptions.ArchiveException()( + 'Remote does not support: "%s" archive type.' % kind) + + for f in walker(commit_id, archive_at_path): + f_path = os.path.join(archive_dir_name, f.path.lstrip('/')) + try: + archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes()) + except NoContentException: + # NOTE(marcink): this is a special case for SVN so we can create "empty" + # directories which arent supported by archiver + archiver.addfile(os.path.join(f_path, '.dir'), f.mode, f.is_link, '') + + if write_metadata: + metadata = dict([ + ('commit_id', commit_id), + ('mtime', mtime), + ]) + metadata.update(extra_metadata) + + meta = ["%s:%s" % (f_name, value) for f_name, value in metadata.items()] + f_path = os.path.join(archive_dir_name, '.archival.txt') + archiver.addfile(f_path, 0o644, False, '\n'.join(meta)) + + return archiver.done() diff --git a/vcsserver/exceptions.py b/vcsserver/exceptions.py --- a/vcsserver/exceptions.py +++ b/vcsserver/exceptions.py @@ -119,3 +119,7 @@ class HTTPRepoBranchProtected(HTTPForbid class RefNotFoundException(KeyError): pass + + +class NoContentException(ValueError): + pass diff --git a/vcsserver/git.py b/vcsserver/git.py --- a/vcsserver/git.py +++ b/vcsserver/git.py @@ -29,6 +29,7 @@ from functools import wraps import more_itertools import pygit2 from pygit2 import Repository as LibGit2Repo +from pygit2 import index as LibGit2Index from dulwich import index, objects from dulwich.client import HttpGitClient, LocalGitClient from dulwich.errors import ( @@ -40,7 +41,7 @@ from dulwich.server import update_server from vcsserver import exceptions, settings, subprocessio from vcsserver.utils import safe_str, safe_int, safe_unicode -from vcsserver.base import RepoFactory, obfuscate_qs +from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo from vcsserver.hgcompat import ( hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler) from vcsserver.git_lfs.lib import LFSOidStore @@ -1190,3 +1191,36 @@ class GitRemote(RemoteBase): 'pre_version': get_git_pre_hook_version(path, bare), 'post_version': get_git_post_hook_version(path, bare), } + + @reraise_safe_exceptions + def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path, + archive_dir_name, commit_id): + + def file_walker(_commit_id, path): + repo_init = self._factory.repo_libgit2(wire) + + with repo_init as repo: + commit = repo[commit_id] + + if path in ['', '/']: + tree = commit.tree + else: + tree = commit.tree[path.rstrip('/')] + tree_id = tree.id.hex + try: + tree = repo[tree_id] + except KeyError: + raise ObjectMissing('No tree with id: {}'.format(tree_id)) + + index = LibGit2Index.Index() + index.read_tree(tree) + file_iter = index + + for fn in file_iter: + file_path = fn.path + mode = fn.mode + is_link = stat.S_ISLNK(mode) + yield ArchiveNode(file_path, mode, is_link, repo[fn.id].read_raw) + + return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, + archive_dir_name, commit_id) diff --git a/vcsserver/hg.py b/vcsserver/hg.py --- a/vcsserver/hg.py +++ b/vcsserver/hg.py @@ -14,9 +14,10 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - +import functools import io import logging +import os import stat import urllib import urllib2 @@ -31,13 +32,14 @@ from mercurial import repair import vcsserver from vcsserver import exceptions -from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original +from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode from vcsserver.hgcompat import ( archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx, hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler, makepeer, instance, match, memctx, exchange, memfilectx, nullrev, hg_merge, patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError, - RepoLookupError, InterventionRequired, RequirementError) + RepoLookupError, InterventionRequired, RequirementError, + alwaysmatcher, patternmatcher, hgutil) from vcsserver.vcs_base import RemoteBase log = logging.getLogger(__name__) @@ -205,22 +207,6 @@ class HgRemote(RemoteBase): return False @reraise_safe_exceptions - def archive_repo(self, archive_path, mtime, file_info, kind): - if kind == "tgz": - archiver = archival.tarit(archive_path, mtime, "gz") - elif kind == "tbz2": - archiver = archival.tarit(archive_path, mtime, "bz2") - elif kind == 'zip': - archiver = archival.zipit(archive_path, mtime) - else: - raise exceptions.ArchiveException()( - 'Remote does not support: "%s".' % kind) - - for f_path, f_mode, f_is_link, f_content in file_info: - archiver.addfile(f_path, f_mode, f_is_link, f_content) - archiver.done() - - @reraise_safe_exceptions def bookmarks(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) @self.region.conditional_cache_on_arguments(condition=cache_on) @@ -1007,3 +993,29 @@ class HgRemote(RemoteBase): 'pre_version': vcsserver.__version__, 'post_version': vcsserver.__version__, } + + @reraise_safe_exceptions + def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path, + archive_dir_name, commit_id): + + def file_walker(_commit_id, path): + repo = self._factory.repo(wire) + ctx = repo[_commit_id] + is_root = path in ['', '/'] + if is_root: + matcher = alwaysmatcher(badfn=None) + else: + matcher = patternmatcher('', [(b'glob', path+'/**', b'')], badfn=None) + file_iter = ctx.manifest().walk(matcher) + + for fn in file_iter: + file_path = fn + flags = ctx.flags(fn) + mode = b'x' in flags and 0o755 or 0o644 + is_link = b'l' in flags + + yield ArchiveNode(file_path, mode, is_link, ctx[fn].data) + + return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, + archive_dir_name, commit_id) + diff --git a/vcsserver/hgcompat.py b/vcsserver/hgcompat.py --- a/vcsserver/hgcompat.py +++ b/vcsserver/hgcompat.py @@ -38,7 +38,7 @@ from mercurial import merge as hg_merge from mercurial import subrepo from mercurial import subrepoutil from mercurial import tags as hg_tag - +from mercurial import util as hgutil from mercurial.commands import clone, nullid, pull from mercurial.context import memctx, memfilectx from mercurial.error import ( @@ -46,7 +46,7 @@ from mercurial.error import ( RequirementError, ProgrammingError) from mercurial.hgweb import hgweb_mod from mercurial.localrepo import instance -from mercurial.match import match +from mercurial.match import match, alwaysmatcher, patternmatcher from mercurial.mdiff import diffopts from mercurial.node import bin, hex from mercurial.encoding import tolocal diff --git a/vcsserver/http_main.py b/vcsserver/http_main.py --- a/vcsserver/http_main.py +++ b/vcsserver/http_main.py @@ -375,7 +375,7 @@ class HTTPApplication(object): # NOTE(marcink): trading complexity for slight performance if log.isEnabledFor(logging.DEBUG): no_args_methods = [ - 'archive_repo' + ] if method in no_args_methods: call_args = '' diff --git a/vcsserver/svn.py b/vcsserver/svn.py --- a/vcsserver/svn.py +++ b/vcsserver/svn.py @@ -19,6 +19,7 @@ from __future__ import absolute_import import os import subprocess +import time from urllib2 import URLError import urlparse import logging @@ -35,7 +36,8 @@ import svn.fs import svn.repos from vcsserver import svn_diff, exceptions, subprocessio, settings -from vcsserver.base import RepoFactory, raise_from_original +from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo +from vcsserver.exceptions import NoContentException from vcsserver.vcs_base import RemoteBase log = logging.getLogger(__name__) @@ -528,6 +530,70 @@ class SvnRemote(RemoteBase): 'post_version': get_svn_post_hook_version(repo_path), } + @reraise_safe_exceptions + def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path, + archive_dir_name, commit_id): + + def walk_tree(root, root_dir, _commit_id): + """ + Special recursive svn repo walker + """ + + filemode_default = 0o100644 + filemode_executable = 0o100755 + + file_iter = svn.fs.dir_entries(root, root_dir) + for f_name in file_iter: + f_type = NODE_TYPE_MAPPING.get(file_iter[f_name].kind, None) + + if f_type == 'dir': + # return only DIR, and then all entries in that dir + yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type + new_root = os.path.join(root_dir, f_name) + for _f_name, _f_data, _f_type in walk_tree(root, new_root, _commit_id): + yield _f_name, _f_data, _f_type + else: + f_path = os.path.join(root_dir, f_name).rstrip('/') + prop_list = svn.fs.node_proplist(root, f_path) + + f_mode = filemode_default + if prop_list.get('svn:executable'): + f_mode = filemode_executable + + f_is_link = False + if prop_list.get('svn:special'): + f_is_link = True + + data = { + 'is_link': f_is_link, + 'mode': f_mode, + 'content_stream': svn.core.Stream(svn.fs.file_contents(root, f_path)).read + } + + yield f_path, data, f_type + + def file_walker(_commit_id, path): + repo = self._factory.repo(wire) + root = svn.fs.revision_root(svn.repos.fs(repo), int(commit_id)) + + def no_content(): + raise NoContentException() + + for f_name, f_data, f_type in walk_tree(root, path, _commit_id): + file_path = f_name + + if f_type == 'dir': + mode = f_data['mode'] + yield ArchiveNode(file_path, mode, False, no_content) + else: + mode = f_data['mode'] + is_link = f_data['is_link'] + data_stream = f_data['content_stream'] + yield ArchiveNode(file_path, mode, is_link, data_stream) + + return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, + archive_dir_name, commit_id) + class SvnDiffer(object): """ @@ -685,8 +751,7 @@ class SvnDiffer(object): if node_kind not in ( svn.core.svn_node_file, svn.core.svn_node_symlink): return [] - content = svn.core.Stream( - svn.fs.file_contents(fs_root, node_path)).read() + content = svn.core.Stream(svn.fs.file_contents(fs_root, node_path)).read() return content.splitlines(True)