|
|
# RhodeCode VCSServer provides access to different vcs backends via network.
|
|
|
# Copyright (C) 2014-2024 RhodeCode GmbH
|
|
|
#
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
|
# (at your option) any later version.
|
|
|
#
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
# GNU General Public License for more details.
|
|
|
#
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
# along with this program; if not, write to the Free Software Foundation,
|
|
|
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
|
"""Handles the Git smart protocol."""
|
|
|
|
|
|
import os
|
|
|
import socket
|
|
|
import logging
|
|
|
|
|
|
import dulwich.protocol
|
|
|
from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
|
|
|
from webob import Request, Response, exc
|
|
|
|
|
|
from vcsserver.lib.ext_json import json
|
|
|
from vcsserver import hooks, subprocessio
|
|
|
from vcsserver.lib.str_utils import ascii_bytes
|
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
class FileWrapper:
|
|
|
"""File wrapper that ensures how much data is read from it."""
|
|
|
|
|
|
def __init__(self, fd, content_length):
|
|
|
self.fd = fd
|
|
|
self.content_length = content_length
|
|
|
self.remain = content_length
|
|
|
|
|
|
def read(self, size):
|
|
|
if size <= self.remain:
|
|
|
try:
|
|
|
data = self.fd.read(size)
|
|
|
except socket.error:
|
|
|
raise IOError(self)
|
|
|
self.remain -= size
|
|
|
elif self.remain:
|
|
|
data = self.fd.read(self.remain)
|
|
|
self.remain = 0
|
|
|
else:
|
|
|
data = None
|
|
|
return data
|
|
|
|
|
|
def __repr__(self):
|
|
|
return '<FileWrapper {} len: {}, read: {}>'.format(
|
|
|
self.fd, self.content_length, self.content_length - self.remain
|
|
|
)
|
|
|
|
|
|
|
|
|
class GitRepository:
|
|
|
"""WSGI app for handling Git smart protocol endpoints."""
|
|
|
|
|
|
git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
|
|
|
commands = frozenset(('git-upload-pack', 'git-receive-pack'))
|
|
|
valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
|
|
|
|
|
|
# The last bytes are the SHA1 of the first 12 bytes.
|
|
|
EMPTY_PACK = (
|
|
|
b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
|
|
|
b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
|
|
|
)
|
|
|
FLUSH_PACKET = b"0000"
|
|
|
|
|
|
SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
|
|
|
|
|
|
def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
|
|
|
files = frozenset(f.lower() for f in os.listdir(content_path))
|
|
|
valid_dir_signature = self.git_folder_signature.issubset(files)
|
|
|
|
|
|
if not valid_dir_signature:
|
|
|
raise OSError(f'{content_path} missing git signature')
|
|
|
|
|
|
self.content_path = content_path
|
|
|
self.repo_name = repo_name
|
|
|
self.extras = extras
|
|
|
self.git_path = git_path
|
|
|
self.update_server_info = update_server_info
|
|
|
|
|
|
def _get_fixedpath(self, path):
|
|
|
"""
|
|
|
Small fix for repo_path
|
|
|
|
|
|
:param path:
|
|
|
"""
|
|
|
path = path.split(self.repo_name, 1)[-1]
|
|
|
if path.startswith('.git'):
|
|
|
# for bare repos we still get the .git prefix inside, we skip it
|
|
|
# here, and remove from the service command
|
|
|
path = path[4:]
|
|
|
|
|
|
return path.strip('/')
|
|
|
|
|
|
def inforefs(self, request, unused_environ):
|
|
|
"""
|
|
|
WSGI Response producer for HTTP GET Git Smart
|
|
|
HTTP /info/refs request.
|
|
|
"""
|
|
|
|
|
|
git_command = request.GET.get('service')
|
|
|
if git_command not in self.commands:
|
|
|
log.debug('command %s not allowed', git_command)
|
|
|
return exc.HTTPForbidden()
|
|
|
|
|
|
# please, resist the urge to add '\n' to git capture and increment
|
|
|
# line count by 1.
|
|
|
# by git docs: Documentation/technical/http-protocol.txt#L214 \n is
|
|
|
# a part of protocol.
|
|
|
# The code in Git client not only does NOT need '\n', but actually
|
|
|
# blows up if you sprinkle "flush" (0000) as "0001\n".
|
|
|
# It reads binary, per number of bytes specified.
|
|
|
# if you do add '\n' as part of data, count it.
|
|
|
server_advert = f'# service={git_command}\n'
|
|
|
packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
|
|
|
try:
|
|
|
gitenv = dict(os.environ)
|
|
|
# forget all configs
|
|
|
gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
|
|
|
command = [self.git_path, git_command[4:], '--stateless-rpc',
|
|
|
'--advertise-refs', self.content_path]
|
|
|
out = subprocessio.SubprocessIOChunker(
|
|
|
command,
|
|
|
env=gitenv,
|
|
|
starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
|
|
|
shell=False
|
|
|
)
|
|
|
except OSError:
|
|
|
log.exception('Error processing command')
|
|
|
raise exc.HTTPExpectationFailed()
|
|
|
|
|
|
resp = Response()
|
|
|
resp.content_type = f'application/x-{git_command}-advertisement'
|
|
|
resp.charset = None
|
|
|
resp.app_iter = out
|
|
|
|
|
|
return resp
|
|
|
|
|
|
def _get_want_capabilities(self, request):
|
|
|
"""Read the capabilities found in the first want line of the request."""
|
|
|
pos = request.body_file_seekable.tell()
|
|
|
first_line = request.body_file_seekable.readline()
|
|
|
request.body_file_seekable.seek(pos)
|
|
|
|
|
|
return frozenset(
|
|
|
dulwich.protocol.extract_want_line_capabilities(first_line)[1])
|
|
|
|
|
|
def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
|
|
|
"""
|
|
|
Construct a response with an empty PACK file.
|
|
|
|
|
|
We use an empty PACK file, as that would trigger the failure of the pull
|
|
|
or clone command.
|
|
|
|
|
|
We also print in the error output a message explaining why the command
|
|
|
was aborted.
|
|
|
|
|
|
If additionally, the user is accepting messages we send them the output
|
|
|
of the pre-pull hook.
|
|
|
|
|
|
Note that for clients not supporting side-band we just send them the
|
|
|
emtpy PACK file.
|
|
|
"""
|
|
|
|
|
|
if self.SIDE_BAND_CAPS.intersection(capabilities):
|
|
|
response = []
|
|
|
proto = dulwich.protocol.Protocol(None, response.append)
|
|
|
proto.write_pkt_line(dulwich.protocol.NAK_LINE)
|
|
|
|
|
|
self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
|
|
|
# N.B.(skreft): Do not change the sideband channel to 3, as that
|
|
|
# produces a fatal error in the client:
|
|
|
# fatal: error in sideband demultiplexer
|
|
|
proto.write_sideband(
|
|
|
dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
|
|
|
ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
|
|
|
proto.write_sideband(
|
|
|
dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
|
|
|
ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
|
|
|
|
|
|
# writes b"0000" as default
|
|
|
proto.write_pkt_line(None)
|
|
|
|
|
|
return response
|
|
|
else:
|
|
|
return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
|
|
|
|
|
|
def _build_post_pull_response(self, response, capabilities, start_message, end_message):
|
|
|
"""
|
|
|
Given a list response we inject the post-pull messages.
|
|
|
|
|
|
We only inject the messages if the client supports sideband, and the
|
|
|
response has the format:
|
|
|
0008NAK\n...0000
|
|
|
|
|
|
Note that we do not check the no-progress capability as by default, git
|
|
|
sends it, which effectively would block all messages.
|
|
|
"""
|
|
|
|
|
|
if not self.SIDE_BAND_CAPS.intersection(capabilities):
|
|
|
return response
|
|
|
|
|
|
if not start_message and not end_message:
|
|
|
return response
|
|
|
|
|
|
try:
|
|
|
iter(response)
|
|
|
# iterator probably will work, we continue
|
|
|
except TypeError:
|
|
|
raise TypeError(f'response must be an iterator: got {type(response)}')
|
|
|
if isinstance(response, (list, tuple)):
|
|
|
raise TypeError(f'response must be an iterator: got {type(response)}')
|
|
|
|
|
|
def injected_response():
|
|
|
|
|
|
do_loop = 1
|
|
|
header_injected = 0
|
|
|
next_item = None
|
|
|
has_item = False
|
|
|
item = b''
|
|
|
|
|
|
while do_loop:
|
|
|
|
|
|
try:
|
|
|
next_item = next(response)
|
|
|
except StopIteration:
|
|
|
do_loop = 0
|
|
|
|
|
|
if has_item:
|
|
|
# last item ! alter it now
|
|
|
if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
|
|
|
new_response = [item[:-4]]
|
|
|
new_response.extend(self._get_messages(end_message, capabilities))
|
|
|
new_response.append(self.FLUSH_PACKET)
|
|
|
item = b''.join(new_response)
|
|
|
|
|
|
yield item
|
|
|
|
|
|
has_item = True
|
|
|
item = next_item
|
|
|
|
|
|
# alter item if it's the initial chunk
|
|
|
if not header_injected and item.startswith(b'0008NAK\n'):
|
|
|
new_response = [b'0008NAK\n']
|
|
|
new_response.extend(self._get_messages(start_message, capabilities))
|
|
|
new_response.append(item[8:])
|
|
|
item = b''.join(new_response)
|
|
|
header_injected = 1
|
|
|
|
|
|
return injected_response()
|
|
|
|
|
|
def _write_sideband_to_proto(self, proto, data, capabilities):
|
|
|
"""
|
|
|
Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
|
|
|
|
|
|
We do not use dulwich's write_sideband directly as it only supports
|
|
|
side-band-64k.
|
|
|
"""
|
|
|
if not data:
|
|
|
return
|
|
|
|
|
|
# N.B.(skreft): The values below are explained in the pack protocol
|
|
|
# documentation, section Packfile Data.
|
|
|
# https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
|
|
|
if CAPABILITY_SIDE_BAND_64K in capabilities:
|
|
|
chunk_size = 65515
|
|
|
elif CAPABILITY_SIDE_BAND in capabilities:
|
|
|
chunk_size = 995
|
|
|
else:
|
|
|
return
|
|
|
|
|
|
chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
|
|
|
|
|
|
for chunk in chunker:
|
|
|
proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
|
|
|
|
|
|
def _get_messages(self, data, capabilities):
|
|
|
"""Return a list with packets for sending data in sideband number 2."""
|
|
|
response = []
|
|
|
proto = dulwich.protocol.Protocol(None, response.append)
|
|
|
|
|
|
self._write_sideband_to_proto(proto, data, capabilities)
|
|
|
|
|
|
return response
|
|
|
|
|
|
def backend(self, request, environ):
|
|
|
"""
|
|
|
WSGI Response producer for HTTP POST Git Smart HTTP requests.
|
|
|
Reads commands and data from HTTP POST's body.
|
|
|
returns an iterator obj with contents of git command's
|
|
|
response to stdout
|
|
|
"""
|
|
|
# TODO(skreft): think how we could detect an HTTPLockedException, as
|
|
|
# we probably want to have the same mechanism used by mercurial and
|
|
|
# simplevcs.
|
|
|
# For that we would need to parse the output of the command looking for
|
|
|
# some signs of the HTTPLockedError, parse the data and reraise it in
|
|
|
# pygrack. However, that would interfere with the streaming.
|
|
|
#
|
|
|
# Now the output of a blocked push is:
|
|
|
# Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
|
|
|
# POST git-receive-pack (1047 bytes)
|
|
|
# remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
|
|
|
# To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
|
|
|
# ! [remote rejected] master -> master (pre-receive hook declined)
|
|
|
# error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
|
|
|
|
|
|
git_command = self._get_fixedpath(request.path_info)
|
|
|
if git_command not in self.commands:
|
|
|
log.debug('command %s not allowed', git_command)
|
|
|
return exc.HTTPForbidden()
|
|
|
|
|
|
capabilities = None
|
|
|
if git_command == 'git-upload-pack':
|
|
|
capabilities = self._get_want_capabilities(request)
|
|
|
|
|
|
if 'CONTENT_LENGTH' in environ:
|
|
|
inputstream = FileWrapper(request.body_file_seekable,
|
|
|
request.content_length)
|
|
|
else:
|
|
|
inputstream = request.body_file_seekable
|
|
|
|
|
|
resp = Response()
|
|
|
resp.content_type = f'application/x-{git_command}-result'
|
|
|
resp.charset = None
|
|
|
|
|
|
pre_pull_messages = ''
|
|
|
# Upload-pack == clone
|
|
|
if git_command == 'git-upload-pack':
|
|
|
hook_response = hooks.git_pre_pull(self.extras)
|
|
|
if hook_response.status != 0:
|
|
|
pre_pull_messages = hook_response.output
|
|
|
resp.app_iter = self._build_failed_pre_pull_response(
|
|
|
capabilities, pre_pull_messages)
|
|
|
return resp
|
|
|
|
|
|
gitenv = dict(os.environ)
|
|
|
# forget all configs
|
|
|
gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
|
|
|
gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
|
|
|
cmd = [self.git_path, git_command[4:], '--stateless-rpc',
|
|
|
self.content_path]
|
|
|
log.debug('handling cmd %s', cmd)
|
|
|
|
|
|
out = subprocessio.SubprocessIOChunker(
|
|
|
cmd,
|
|
|
input_stream=inputstream,
|
|
|
env=gitenv,
|
|
|
cwd=self.content_path,
|
|
|
shell=False,
|
|
|
fail_on_stderr=False,
|
|
|
fail_on_return_code=False
|
|
|
)
|
|
|
|
|
|
if self.update_server_info and git_command == 'git-receive-pack':
|
|
|
# We need to fully consume the iterator here, as the
|
|
|
# update-server-info command needs to be run after the push.
|
|
|
out = list(out)
|
|
|
|
|
|
# Updating refs manually after each push.
|
|
|
# This is required as some clients are exposing Git repos internally
|
|
|
# with the dumb protocol.
|
|
|
cmd = [self.git_path, 'update-server-info']
|
|
|
log.debug('handling cmd %s', cmd)
|
|
|
output = subprocessio.SubprocessIOChunker(
|
|
|
cmd,
|
|
|
input_stream=inputstream,
|
|
|
env=gitenv,
|
|
|
cwd=self.content_path,
|
|
|
shell=False,
|
|
|
fail_on_stderr=False,
|
|
|
fail_on_return_code=False
|
|
|
)
|
|
|
# Consume all the output so the subprocess finishes
|
|
|
for _ in output:
|
|
|
pass
|
|
|
|
|
|
# Upload-pack == clone
|
|
|
if git_command == 'git-upload-pack':
|
|
|
hook_response = hooks.git_post_pull(self.extras)
|
|
|
post_pull_messages = hook_response.output
|
|
|
resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
|
|
|
else:
|
|
|
resp.app_iter = out
|
|
|
|
|
|
return resp
|
|
|
|
|
|
def __call__(self, environ, start_response):
|
|
|
request = Request(environ)
|
|
|
_path = self._get_fixedpath(request.path_info)
|
|
|
if _path.startswith('info/refs'):
|
|
|
app = self.inforefs
|
|
|
else:
|
|
|
app = self.backend
|
|
|
|
|
|
try:
|
|
|
resp = app(request, environ)
|
|
|
except exc.HTTPException as error:
|
|
|
log.exception('HTTP Error')
|
|
|
resp = error
|
|
|
except Exception:
|
|
|
log.exception('Unknown error')
|
|
|
resp = exc.HTTPInternalServerError()
|
|
|
|
|
|
return resp(environ, start_response)
|
|
|
|