##// END OF EJS Templates
core: moved str_utils and type_utils to lib so it's consistent with ce, and easier to sync up codebases
core: moved str_utils and type_utils to lib so it's consistent with ce, and easier to sync up codebases

File last commit:

r1249:5745b11f default
r1249:5745b11f default
Show More
hooks.py
832 lines | 24.8 KiB | text/x-python | PythonLexer
# RhodeCode VCSServer provides access to different vcs backends via network.
# Copyright (C) 2014-2023 RhodeCode GmbH
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
import io
import os
import sys
import logging
import collections
import base64
import msgpack
import dataclasses
import pygit2
import http.client
from celery import Celery
import mercurial.scmutil
import mercurial.node
from vcsserver.lib.ext_json import json
from vcsserver import exceptions, subprocessio, settings
from vcsserver.lib.str_utils import ascii_str, safe_str
from vcsserver.remote.git_remote import Repository
celery_app = Celery('__vcsserver__')
log = logging.getLogger(__name__)
class HooksHttpClient:
proto = 'msgpack.v1'
connection = None
def __init__(self, hooks_uri):
self.hooks_uri = hooks_uri
def __repr__(self):
return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
def __call__(self, method, extras):
connection = http.client.HTTPConnection(self.hooks_uri)
# binary msgpack body
headers, body = self._serialize(method, extras)
log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
try:
try:
connection.request('POST', '/', body, headers)
except Exception as error:
log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
raise
response = connection.getresponse()
try:
return msgpack.load(response)
except Exception:
response_data = response.read()
log.exception('Failed to decode hook response json data. '
'response_code:%s, raw_data:%s',
response.status, response_data)
raise
finally:
connection.close()
@classmethod
def _serialize(cls, hook_name, extras):
data = {
'method': hook_name,
'extras': extras
}
headers = {
"rc-hooks-protocol": cls.proto,
"Connection": "keep-alive"
}
return headers, msgpack.packb(data)
class HooksCeleryClient:
TASK_TIMEOUT = 60 # time in seconds
def __init__(self, queue, backend):
celery_app.config_from_object({
'broker_url': queue, 'result_backend': backend,
'broker_connection_retry_on_startup': True,
'task_serializer': 'json',
'accept_content': ['json', 'msgpack'],
'result_serializer': 'json',
'result_accept_content': ['json', 'msgpack']
})
self.celery_app = celery_app
def __call__(self, method, extras):
inquired_task = self.celery_app.signature(
f'rhodecode.lib.celerylib.tasks.{method}'
)
return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT)
class HooksShadowRepoClient:
def __call__(self, hook_name, extras):
return {'output': '', 'status': 0}
class RemoteMessageWriter:
"""Writer base class."""
def write(self, message):
raise NotImplementedError()
class HgMessageWriter(RemoteMessageWriter):
"""Writer that knows how to send messages to mercurial clients."""
def __init__(self, ui):
self.ui = ui
def write(self, message: str):
# TODO: Check why the quiet flag is set by default.
old = self.ui.quiet
self.ui.quiet = False
self.ui.status(message.encode('utf-8'))
self.ui.quiet = old
class GitMessageWriter(RemoteMessageWriter):
"""Writer that knows how to send messages to git clients."""
def __init__(self, stdout=None):
self.stdout = stdout or sys.stdout
def write(self, message: str):
self.stdout.write(message)
class SvnMessageWriter(RemoteMessageWriter):
"""Writer that knows how to send messages to svn clients."""
def __init__(self, stderr=None):
# SVN needs data sent to stderr for back-to-client messaging
self.stderr = stderr or sys.stderr
def write(self, message):
self.stderr.write(message)
def _handle_exception(result):
exception_class = result.get('exception')
exception_traceback = result.get('exception_traceback')
log.debug('Handling hook-call exception: %s', exception_class)
if exception_traceback:
log.error('Got traceback from remote call:%s', exception_traceback)
if exception_class == 'HTTPLockedRC':
raise exceptions.RepositoryLockedException()(*result['exception_args'])
elif exception_class == 'HTTPBranchProtected':
raise exceptions.RepositoryBranchProtectedException()(*result['exception_args'])
elif exception_class == 'RepositoryError':
raise exceptions.VcsException()(*result['exception_args'])
elif exception_class:
raise Exception(
f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
)
def _get_hooks_client(extras):
hooks_uri = extras.get('hooks_uri')
task_queue = extras.get('task_queue')
task_backend = extras.get('task_backend')
is_shadow_repo = extras.get('is_shadow_repo')
if hooks_uri:
return HooksHttpClient(hooks_uri)
elif task_queue and task_backend:
return HooksCeleryClient(task_queue, task_backend)
elif is_shadow_repo:
return HooksShadowRepoClient()
else:
raise Exception("Hooks client not found!")
def _call_hook(hook_name, extras, writer):
hooks_client = _get_hooks_client(extras)
log.debug('Hooks, using client:%s', hooks_client)
result = hooks_client(hook_name, extras)
log.debug('Hooks got result: %s', result)
_handle_exception(result)
writer.write(result['output'])
return result['status']
def _extras_from_ui(ui):
hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
if not hook_data:
# maybe it's inside environ ?
env_hook_data = os.environ.get('RC_SCM_DATA')
if env_hook_data:
hook_data = env_hook_data
extras = {}
if hook_data:
extras = json.loads(hook_data)
return extras
def _rev_range_hash(repo, node, check_heads=False):
from vcsserver.hgcompat import get_ctx
commits = []
revs = []
start = get_ctx(repo, node).rev()
end = len(repo)
for rev in range(start, end):
revs.append(rev)
ctx = get_ctx(repo, rev)
commit_id = ascii_str(mercurial.node.hex(ctx.node()))
branch = safe_str(ctx.branch())
commits.append((commit_id, branch))
parent_heads = []
if check_heads:
parent_heads = _check_heads(repo, start, end, revs)
return commits, parent_heads
def _check_heads(repo, start, end, commits):
from vcsserver.hgcompat import get_ctx
changelog = repo.changelog
parents = set()
for new_rev in commits:
for p in changelog.parentrevs(new_rev):
if p == mercurial.node.nullrev:
continue
if p < start:
parents.add(p)
for p in parents:
branch = get_ctx(repo, p).branch()
# The heads descending from that parent, on the same branch
parent_heads = {p}
reachable = {p}
for x in range(p + 1, end):
if get_ctx(repo, x).branch() != branch:
continue
for pp in changelog.parentrevs(x):
if pp in reachable:
reachable.add(x)
parent_heads.discard(pp)
parent_heads.add(x)
# More than one head? Suggest merging
if len(parent_heads) > 1:
return list(parent_heads)
return []
def _get_git_env():
env = {}
for k, v in os.environ.items():
if k.startswith('GIT'):
env[k] = v
# serialized version
return [(k, v) for k, v in env.items()]
def _get_hg_env(old_rev, new_rev, txnid, repo_path):
env = {}
for k, v in os.environ.items():
if k.startswith('HG'):
env[k] = v
env['HG_NODE'] = old_rev
env['HG_NODE_LAST'] = new_rev
env['HG_TXNID'] = txnid
env['HG_PENDING'] = repo_path
return [(k, v) for k, v in env.items()]
def _fix_hooks_executables(ini_path=''):
"""
This is a trick to set proper settings.EXECUTABLE paths for certain execution patterns
especially for subversion where hooks strip entire env, and calling just 'svn' command will most likely fail
because svn is not on PATH
"""
from vcsserver.http_main import sanitize_settings_and_apply_defaults
from vcsserver.lib.config_utils import get_app_config_lightweight
core_binary_dir = settings.BINARY_DIR or '/usr/local/bin/rhodecode_bin/vcs_bin'
if ini_path:
ini_settings = get_app_config_lightweight(ini_path)
ini_settings = sanitize_settings_and_apply_defaults({'__file__': ini_path}, ini_settings)
core_binary_dir = ini_settings['core.binary_dir']
settings.BINARY_DIR = core_binary_dir
def repo_size(ui, repo, **kwargs):
extras = _extras_from_ui(ui)
return _call_hook('repo_size', extras, HgMessageWriter(ui))
def pre_pull(ui, repo, **kwargs):
extras = _extras_from_ui(ui)
return _call_hook('pre_pull', extras, HgMessageWriter(ui))
def pre_pull_ssh(ui, repo, **kwargs):
extras = _extras_from_ui(ui)
if extras and extras.get('SSH'):
return pre_pull(ui, repo, **kwargs)
return 0
def post_pull(ui, repo, **kwargs):
extras = _extras_from_ui(ui)
return _call_hook('post_pull', extras, HgMessageWriter(ui))
def post_pull_ssh(ui, repo, **kwargs):
extras = _extras_from_ui(ui)
if extras and extras.get('SSH'):
return post_pull(ui, repo, **kwargs)
return 0
def pre_push(ui, repo, node=None, **kwargs):
"""
Mercurial pre_push hook
"""
extras = _extras_from_ui(ui)
detect_force_push = extras.get('detect_force_push')
rev_data = []
hook_type: str = safe_str(kwargs.get('hooktype'))
if node and hook_type == 'pretxnchangegroup':
branches = collections.defaultdict(list)
commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
for commit_id, branch in commits:
branches[branch].append(commit_id)
for branch, commits in branches.items():
old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
rev_data.append({
'total_commits': len(commits),
'old_rev': old_rev,
'new_rev': commits[-1],
'ref': '',
'type': 'branch',
'name': branch,
})
for push_ref in rev_data:
push_ref['multiple_heads'] = _heads
repo_path = os.path.join(
extras.get('repo_store', ''), extras.get('repository', ''))
push_ref['hg_env'] = _get_hg_env(
old_rev=push_ref['old_rev'],
new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
repo_path=repo_path)
extras['hook_type'] = hook_type or 'pre_push'
extras['commit_ids'] = rev_data
return _call_hook('pre_push', extras, HgMessageWriter(ui))
def pre_push_ssh(ui, repo, node=None, **kwargs):
extras = _extras_from_ui(ui)
if extras.get('SSH'):
return pre_push(ui, repo, node, **kwargs)
return 0
def pre_push_ssh_auth(ui, repo, node=None, **kwargs):
"""
Mercurial pre_push hook for SSH
"""
extras = _extras_from_ui(ui)
if extras.get('SSH'):
permission = extras['SSH_PERMISSIONS']
if 'repository.write' == permission or 'repository.admin' == permission:
return 0
# non-zero ret code
return 1
return 0
def post_push(ui, repo, node, **kwargs):
"""
Mercurial post_push hook
"""
extras = _extras_from_ui(ui)
commit_ids = []
branches = []
bookmarks = []
tags = []
hook_type: str = safe_str(kwargs.get('hooktype'))
commits, _heads = _rev_range_hash(repo, node)
for commit_id, branch in commits:
commit_ids.append(commit_id)
if branch not in branches:
branches.append(branch)
if hasattr(ui, '_rc_pushkey_bookmarks'):
bookmarks = ui._rc_pushkey_bookmarks
extras['hook_type'] = hook_type or 'post_push'
extras['commit_ids'] = commit_ids
extras['new_refs'] = {
'branches': branches,
'bookmarks': bookmarks,
'tags': tags
}
return _call_hook('post_push', extras, HgMessageWriter(ui))
def post_push_ssh(ui, repo, node, **kwargs):
"""
Mercurial post_push hook for SSH
"""
if _extras_from_ui(ui).get('SSH'):
return post_push(ui, repo, node, **kwargs)
return 0
def key_push(ui, repo, **kwargs):
from vcsserver.hgcompat import get_ctx
if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
# store new bookmarks in our UI object propagated later to post_push
ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
return
# backward compat
log_pull_action = post_pull
# backward compat
log_push_action = post_push
def handle_git_pre_receive(unused_repo_path, unused_revs, unused_env):
"""
Old hook name: keep here for backward compatibility.
This is only required when the installed git hooks are not upgraded.
"""
pass
def handle_git_post_receive(unused_repo_path, unused_revs, unused_env):
"""
Old hook name: keep here for backward compatibility.
This is only required when the installed git hooks are not upgraded.
"""
pass
@dataclasses.dataclass
class HookResponse:
status: int
output: str
def git_pre_pull(extras) -> HookResponse:
"""
Pre pull hook.
:param extras: dictionary containing the keys defined in simplevcs
:type extras: dict
:return: status code of the hook. 0 for success.
:rtype: int
"""
if 'pull' not in extras['hooks']:
return HookResponse(0, '')
stdout = io.StringIO()
try:
status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
except Exception as error:
log.exception('Failed to call pre_pull hook')
status_code = 128
stdout.write(f'ERROR: {error}\n')
return HookResponse(status_code, stdout.getvalue())
def git_post_pull(extras) -> HookResponse:
"""
Post pull hook.
:param extras: dictionary containing the keys defined in simplevcs
:type extras: dict
:return: status code of the hook. 0 for success.
:rtype: int
"""
if 'pull' not in extras['hooks']:
return HookResponse(0, '')
stdout = io.StringIO()
try:
status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
except Exception as error:
status = 128
stdout.write(f'ERROR: {error}\n')
return HookResponse(status, stdout.getvalue())
def _parse_git_ref_lines(revision_lines):
rev_data = []
for revision_line in revision_lines or []:
old_rev, new_rev, ref = revision_line.strip().split(' ')
ref_data = ref.split('/', 2)
if ref_data[1] in ('tags', 'heads'):
rev_data.append({
# NOTE(marcink):
# we're unable to tell total_commits for git at this point
# but we set the variable for consistency with GIT
'total_commits': -1,
'old_rev': old_rev,
'new_rev': new_rev,
'ref': ref,
'type': ref_data[1],
'name': ref_data[2],
})
return rev_data
def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
"""
Pre push hook.
:return: status code of the hook. 0 for success.
"""
extras = json.loads(env['RC_SCM_DATA'])
rev_data = _parse_git_ref_lines(revision_lines)
if 'push' not in extras['hooks']:
return 0
_fix_hooks_executables()
empty_commit_id = '0' * 40
detect_force_push = extras.get('detect_force_push')
for push_ref in rev_data:
# store our git-env which holds the temp store
push_ref['git_env'] = _get_git_env()
push_ref['pruned_sha'] = ''
if not detect_force_push:
# don't check for forced-push when we don't need to
continue
type_ = push_ref['type']
new_branch = push_ref['old_rev'] == empty_commit_id
delete_branch = push_ref['new_rev'] == empty_commit_id
if type_ == 'heads' and not (new_branch or delete_branch):
old_rev = push_ref['old_rev']
new_rev = push_ref['new_rev']
cmd = [settings.GIT_EXECUTABLE(), 'rev-list', old_rev, f'^{new_rev}']
stdout, stderr = subprocessio.run_command(
cmd, env=os.environ.copy())
# means we're having some non-reachable objects, this forced push was used
if stdout:
push_ref['pruned_sha'] = stdout.splitlines()
extras['hook_type'] = 'pre_receive'
extras['commit_ids'] = rev_data
stdout = sys.stdout
status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
return status_code
def git_post_receive(unused_repo_path, revision_lines, env) -> int:
"""
Post push hook.
:return: status code of the hook. 0 for success.
"""
extras = json.loads(env['RC_SCM_DATA'])
if 'push' not in extras['hooks']:
return 0
_fix_hooks_executables()
rev_data = _parse_git_ref_lines(revision_lines)
git_revs = []
# N.B.(skreft): it is ok to just call git, as git before calling a
# subcommand sets the PATH environment variable so that it point to the
# correct version of the git executable.
empty_commit_id = '0' * 40
branches = []
tags = []
for push_ref in rev_data:
type_ = push_ref['type']
if type_ == 'heads':
# starting new branch case
if push_ref['old_rev'] == empty_commit_id:
push_ref_name = push_ref['name']
if push_ref_name not in branches:
branches.append(push_ref_name)
need_head_set = ''
with Repository(os.getcwd()) as repo:
try:
repo.head
except pygit2.GitError:
need_head_set = f'refs/heads/{push_ref_name}'
if need_head_set:
repo.set_head(need_head_set)
print(f"Setting default branch to {push_ref_name}")
cmd = [settings.GIT_EXECUTABLE(), 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
stdout, stderr = subprocessio.run_command(
cmd, env=os.environ.copy())
heads = safe_str(stdout)
heads = heads.replace(push_ref['ref'], '')
heads = ' '.join(head for head
in heads.splitlines() if head) or '.'
cmd = [settings.GIT_EXECUTABLE(), 'log', '--reverse',
'--pretty=format:%H', '--', push_ref['new_rev'],
'--not', heads]
stdout, stderr = subprocessio.run_command(
cmd, env=os.environ.copy())
git_revs.extend(list(map(ascii_str, stdout.splitlines())))
# delete branch case
elif push_ref['new_rev'] == empty_commit_id:
git_revs.append(f'delete_branch=>{push_ref["name"]}')
else:
if push_ref['name'] not in branches:
branches.append(push_ref['name'])
cmd = [settings.GIT_EXECUTABLE(), 'log',
f'{push_ref["old_rev"]}..{push_ref["new_rev"]}',
'--reverse', '--pretty=format:%H']
stdout, stderr = subprocessio.run_command(
cmd, env=os.environ.copy())
# we get bytes from stdout, we need str to be consistent
log_revs = list(map(ascii_str, stdout.splitlines()))
git_revs.extend(log_revs)
# Pure pygit2 impl. but still 2-3x slower :/
# results = []
#
# with Repository(os.getcwd()) as repo:
# repo_new_rev = repo[push_ref['new_rev']]
# repo_old_rev = repo[push_ref['old_rev']]
# walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
#
# for commit in walker:
# if commit.id == repo_old_rev.id:
# break
# results.append(commit.id.hex)
# # reverse the order, can't use GIT_SORT_REVERSE
# log_revs = results[::-1]
elif type_ == 'tags':
if push_ref['name'] not in tags:
tags.append(push_ref['name'])
git_revs.append(f'tag=>{push_ref["name"]}')
extras['hook_type'] = 'post_receive'
extras['commit_ids'] = git_revs
extras['new_refs'] = {
'branches': branches,
'bookmarks': [],
'tags': tags,
}
stdout = sys.stdout
if 'repo_size' in extras['hooks']:
try:
_call_hook('repo_size', extras, GitMessageWriter(stdout))
except Exception:
pass
status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
return status_code
def _get_extras_from_txn_id(path, txn_id):
_fix_hooks_executables()
extras = {}
try:
cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
'-t', txn_id,
'--revprop', path, 'rc-scm-extras']
stdout, stderr = subprocessio.run_command(
cmd, env=os.environ.copy())
extras = json.loads(base64.urlsafe_b64decode(stdout))
except Exception:
log.exception('Failed to extract extras info from txn_id')
return extras
def _get_extras_from_commit_id(commit_id, path):
_fix_hooks_executables()
extras = {}
try:
cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
'-r', commit_id,
'--revprop', path, 'rc-scm-extras']
stdout, stderr = subprocessio.run_command(
cmd, env=os.environ.copy())
extras = json.loads(base64.urlsafe_b64decode(stdout))
except Exception:
log.exception('Failed to extract extras info from commit_id')
return extras
def svn_pre_commit(repo_path, commit_data, env):
path, txn_id = commit_data
branches = []
tags = []
if env.get('RC_SCM_DATA'):
extras = json.loads(env['RC_SCM_DATA'])
else:
# fallback method to read from TXN-ID stored data
extras = _get_extras_from_txn_id(path, txn_id)
if not extras:
#TODO: temporary fix until svn txn-id changes are merged
return 0
raise ValueError('Failed to extract context data called extras for hook execution')
extras['hook_type'] = 'pre_commit'
extras['commit_ids'] = [txn_id]
extras['txn_id'] = txn_id
extras['new_refs'] = {
'total_commits': 1,
'branches': branches,
'bookmarks': [],
'tags': tags,
}
return _call_hook('pre_push', extras, SvnMessageWriter())
def svn_post_commit(repo_path, commit_data, env):
"""
commit_data is path, rev, txn_id
"""
if len(commit_data) == 3:
path, commit_id, txn_id = commit_data
elif len(commit_data) == 2:
log.error('Failed to extract txn_id from commit_data using legacy method. '
'Some functionality might be limited')
path, commit_id = commit_data
txn_id = None
else:
return 0
branches = []
tags = []
if env.get('RC_SCM_DATA'):
extras = json.loads(env['RC_SCM_DATA'])
else:
# fallback method to read from TXN-ID stored data
extras = _get_extras_from_commit_id(commit_id, path)
if not extras:
#TODO: temporary fix until svn txn-id changes are merged
return 0
raise ValueError('Failed to extract context data called extras for hook execution')
extras['hook_type'] = 'post_commit'
extras['commit_ids'] = [commit_id]
extras['txn_id'] = txn_id
extras['new_refs'] = {
'branches': branches,
'bookmarks': [],
'tags': tags,
'total_commits': 1,
}
if 'repo_size' in extras['hooks']:
try:
_call_hook('repo_size', extras, SvnMessageWriter())
except Exception:
pass
return _call_hook('post_push', extras, SvnMessageWriter())