pygrack.py
417 lines
| 15.3 KiB
| text/x-python
|
PythonLexer
/ vcsserver / pygrack.py
r0 | # RhodeCode VCSServer provides access to different vcs backends via network. | |||
r1126 | # Copyright (C) 2014-2023 RhodeCode GmbH | |||
r0 | # | |||
# This program is free software; you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation; either version 3 of the License, or | ||||
# (at your option) any later version. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU General Public License | ||||
# along with this program; if not, write to the Free Software Foundation, | ||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
"""Handles the Git smart protocol.""" | ||||
import os | ||||
import socket | ||||
import logging | ||||
import dulwich.protocol | ||||
r1048 | from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K | |||
r0 | from webob import Request, Response, exc | |||
r1048 | from vcsserver.lib.rc_json import json | |||
r0 | from vcsserver import hooks, subprocessio | |||
r1060 | from vcsserver.str_utils import ascii_bytes | |||
r0 | ||||
log = logging.getLogger(__name__) | ||||
r1152 | class FileWrapper: | |||
r0 | """File wrapper that ensures how much data is read from it.""" | |||
def __init__(self, fd, content_length): | ||||
self.fd = fd | ||||
self.content_length = content_length | ||||
self.remain = content_length | ||||
def read(self, size): | ||||
if size <= self.remain: | ||||
try: | ||||
data = self.fd.read(size) | ||||
except socket.error: | ||||
raise IOError(self) | ||||
self.remain -= size | ||||
elif self.remain: | ||||
data = self.fd.read(self.remain) | ||||
self.remain = 0 | ||||
else: | ||||
data = None | ||||
return data | ||||
def __repr__(self): | ||||
r1114 | return '<FileWrapper {} len: {}, read: {}>'.format( | |||
r0 | self.fd, self.content_length, self.content_length - self.remain | |||
) | ||||
r1152 | class GitRepository: | |||
r0 | """WSGI app for handling Git smart protocol endpoints.""" | |||
r1048 | git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs')) | |||
r0 | commands = frozenset(('git-upload-pack', 'git-receive-pack')) | |||
r1114 | valid_accepts = frozenset(f'application/x-{c}-result' for c in commands) | |||
r0 | ||||
# The last bytes are the SHA1 of the first 12 bytes. | ||||
EMPTY_PACK = ( | ||||
r1048 | b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' + | |||
b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e' | ||||
r0 | ) | |||
r1048 | FLUSH_PACKET = b"0000" | |||
r0 | ||||
r1048 | SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K)) | |||
def __init__(self, repo_name, content_path, git_path, update_server_info, extras): | ||||
r0 | files = frozenset(f.lower() for f in os.listdir(content_path)) | |||
valid_dir_signature = self.git_folder_signature.issubset(files) | ||||
if not valid_dir_signature: | ||||
r1118 | raise OSError(f'{content_path} missing git signature') | |||
r0 | ||||
self.content_path = content_path | ||||
self.repo_name = repo_name | ||||
self.extras = extras | ||||
self.git_path = git_path | ||||
self.update_server_info = update_server_info | ||||
def _get_fixedpath(self, path): | ||||
""" | ||||
Small fix for repo_path | ||||
:param path: | ||||
""" | ||||
r202 | path = path.split(self.repo_name, 1)[-1] | |||
if path.startswith('.git'): | ||||
# for bare repos we still get the .git prefix inside, we skip it | ||||
# here, and remove from the service command | ||||
path = path[4:] | ||||
return path.strip('/') | ||||
r0 | ||||
def inforefs(self, request, unused_environ): | ||||
""" | ||||
WSGI Response producer for HTTP GET Git Smart | ||||
HTTP /info/refs request. | ||||
""" | ||||
git_command = request.GET.get('service') | ||||
if git_command not in self.commands: | ||||
log.debug('command %s not allowed', git_command) | ||||
return exc.HTTPForbidden() | ||||
# please, resist the urge to add '\n' to git capture and increment | ||||
# line count by 1. | ||||
# by git docs: Documentation/technical/http-protocol.txt#L214 \n is | ||||
# a part of protocol. | ||||
# The code in Git client not only does NOT need '\n', but actually | ||||
# blows up if you sprinkle "flush" (0000) as "0001\n". | ||||
# It reads binary, per number of bytes specified. | ||||
# if you do add '\n' as part of data, count it. | ||||
r1153 | server_advert = f'# service={git_command}\n' | |||
r1048 | packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower() | |||
r0 | try: | |||
gitenv = dict(os.environ) | ||||
# forget all configs | ||||
gitenv['RC_SCM_DATA'] = json.dumps(self.extras) | ||||
command = [self.git_path, git_command[4:], '--stateless-rpc', | ||||
'--advertise-refs', self.content_path] | ||||
out = subprocessio.SubprocessIOChunker( | ||||
command, | ||||
env=gitenv, | ||||
r1048 | starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET], | |||
r0 | shell=False | |||
) | ||||
r1048 | except OSError: | |||
r0 | log.exception('Error processing command') | |||
raise exc.HTTPExpectationFailed() | ||||
resp = Response() | ||||
r1048 | resp.content_type = f'application/x-{git_command}-advertisement' | |||
r0 | resp.charset = None | |||
resp.app_iter = out | ||||
return resp | ||||
def _get_want_capabilities(self, request): | ||||
"""Read the capabilities found in the first want line of the request.""" | ||||
pos = request.body_file_seekable.tell() | ||||
first_line = request.body_file_seekable.readline() | ||||
request.body_file_seekable.seek(pos) | ||||
return frozenset( | ||||
dulwich.protocol.extract_want_line_capabilities(first_line)[1]) | ||||
def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages): | ||||
""" | ||||
Construct a response with an empty PACK file. | ||||
We use an empty PACK file, as that would trigger the failure of the pull | ||||
or clone command. | ||||
We also print in the error output a message explaining why the command | ||||
was aborted. | ||||
r1048 | If additionally, the user is accepting messages we send them the output | |||
r0 | of the pre-pull hook. | |||
Note that for clients not supporting side-band we just send them the | ||||
emtpy PACK file. | ||||
""" | ||||
r1048 | ||||
r0 | if self.SIDE_BAND_CAPS.intersection(capabilities): | |||
response = [] | ||||
proto = dulwich.protocol.Protocol(None, response.append) | ||||
r1048 | proto.write_pkt_line(dulwich.protocol.NAK_LINE) | |||
self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities) | ||||
r0 | # N.B.(skreft): Do not change the sideband channel to 3, as that | |||
# produces a fatal error in the client: | ||||
# fatal: error in sideband demultiplexer | ||||
r1048 | proto.write_sideband( | |||
dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, | ||||
ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True)) | ||||
proto.write_sideband( | ||||
dulwich.protocol.SIDE_BAND_CHANNEL_DATA, | ||||
ascii_bytes(self.EMPTY_PACK, allow_bytes=True)) | ||||
r0 | ||||
r1048 | # writes b"0000" as default | |||
r0 | proto.write_pkt_line(None) | |||
return response | ||||
else: | ||||
r1048 | return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)] | |||
def _build_post_pull_response(self, response, capabilities, start_message, end_message): | ||||
""" | ||||
Given a list response we inject the post-pull messages. | ||||
We only inject the messages if the client supports sideband, and the | ||||
response has the format: | ||||
0008NAK\n...0000 | ||||
Note that we do not check the no-progress capability as by default, git | ||||
sends it, which effectively would block all messages. | ||||
""" | ||||
if not self.SIDE_BAND_CAPS.intersection(capabilities): | ||||
return response | ||||
if not start_message and not end_message: | ||||
return response | ||||
try: | ||||
iter(response) | ||||
# iterator probably will work, we continue | ||||
except TypeError: | ||||
raise TypeError(f'response must be an iterator: got {type(response)}') | ||||
if isinstance(response, (list, tuple)): | ||||
raise TypeError(f'response must be an iterator: got {type(response)}') | ||||
def injected_response(): | ||||
r0 | ||||
r1048 | do_loop = 1 | |||
header_injected = 0 | ||||
next_item = None | ||||
has_item = False | ||||
r1118 | item = b'' | |||
r1048 | while do_loop: | |||
try: | ||||
next_item = next(response) | ||||
except StopIteration: | ||||
do_loop = 0 | ||||
if has_item: | ||||
# last item ! alter it now | ||||
if do_loop == 0 and item.endswith(self.FLUSH_PACKET): | ||||
new_response = [item[:-4]] | ||||
new_response.extend(self._get_messages(end_message, capabilities)) | ||||
new_response.append(self.FLUSH_PACKET) | ||||
item = b''.join(new_response) | ||||
yield item | ||||
r1118 | ||||
r1048 | has_item = True | |||
item = next_item | ||||
# alter item if it's the initial chunk | ||||
if not header_injected and item.startswith(b'0008NAK\n'): | ||||
new_response = [b'0008NAK\n'] | ||||
new_response.extend(self._get_messages(start_message, capabilities)) | ||||
new_response.append(item[8:]) | ||||
item = b''.join(new_response) | ||||
header_injected = 1 | ||||
return injected_response() | ||||
def _write_sideband_to_proto(self, proto, data, capabilities): | ||||
r0 | """ | |||
r1048 | Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS | |||
r0 | ||||
We do not use dulwich's write_sideband directly as it only supports | ||||
side-band-64k. | ||||
""" | ||||
if not data: | ||||
return | ||||
# N.B.(skreft): The values below are explained in the pack protocol | ||||
# documentation, section Packfile Data. | ||||
# https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt | ||||
r1048 | if CAPABILITY_SIDE_BAND_64K in capabilities: | |||
r0 | chunk_size = 65515 | |||
r1048 | elif CAPABILITY_SIDE_BAND in capabilities: | |||
r0 | chunk_size = 995 | |||
else: | ||||
return | ||||
r1048 | chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size)) | |||
r0 | ||||
for chunk in chunker: | ||||
r1048 | proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True)) | |||
r0 | ||||
def _get_messages(self, data, capabilities): | ||||
"""Return a list with packets for sending data in sideband number 2.""" | ||||
response = [] | ||||
proto = dulwich.protocol.Protocol(None, response.append) | ||||
r1048 | self._write_sideband_to_proto(proto, data, capabilities) | |||
r0 | ||||
return response | ||||
def backend(self, request, environ): | ||||
""" | ||||
WSGI Response producer for HTTP POST Git Smart HTTP requests. | ||||
Reads commands and data from HTTP POST's body. | ||||
returns an iterator obj with contents of git command's | ||||
response to stdout | ||||
""" | ||||
# TODO(skreft): think how we could detect an HTTPLockedException, as | ||||
# we probably want to have the same mechanism used by mercurial and | ||||
# simplevcs. | ||||
# For that we would need to parse the output of the command looking for | ||||
# some signs of the HTTPLockedError, parse the data and reraise it in | ||||
# pygrack. However, that would interfere with the streaming. | ||||
# | ||||
# Now the output of a blocked push is: | ||||
# Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git | ||||
# POST git-receive-pack (1047 bytes) | ||||
# remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto` | ||||
# To http://test_regular:test12@127.0.0.1:5001/vcs_test_git | ||||
# ! [remote rejected] master -> master (pre-receive hook declined) | ||||
# error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git' | ||||
git_command = self._get_fixedpath(request.path_info) | ||||
if git_command not in self.commands: | ||||
log.debug('command %s not allowed', git_command) | ||||
return exc.HTTPForbidden() | ||||
capabilities = None | ||||
if git_command == 'git-upload-pack': | ||||
capabilities = self._get_want_capabilities(request) | ||||
if 'CONTENT_LENGTH' in environ: | ||||
inputstream = FileWrapper(request.body_file_seekable, | ||||
request.content_length) | ||||
else: | ||||
inputstream = request.body_file_seekable | ||||
resp = Response() | ||||
r1114 | resp.content_type = f'application/x-{git_command}-result' | |||
r0 | resp.charset = None | |||
r273 | pre_pull_messages = '' | |||
r1048 | # Upload-pack == clone | |||
r0 | if git_command == 'git-upload-pack': | |||
r1108 | hook_response = hooks.git_pre_pull(self.extras) | |||
if hook_response.status != 0: | ||||
pre_pull_messages = hook_response.output | ||||
r0 | resp.app_iter = self._build_failed_pre_pull_response( | |||
capabilities, pre_pull_messages) | ||||
return resp | ||||
gitenv = dict(os.environ) | ||||
# forget all configs | ||||
gitenv['GIT_CONFIG_NOGLOBAL'] = '1' | ||||
gitenv['RC_SCM_DATA'] = json.dumps(self.extras) | ||||
cmd = [self.git_path, git_command[4:], '--stateless-rpc', | ||||
self.content_path] | ||||
log.debug('handling cmd %s', cmd) | ||||
out = subprocessio.SubprocessIOChunker( | ||||
cmd, | ||||
r1048 | input_stream=inputstream, | |||
r0 | env=gitenv, | |||
cwd=self.content_path, | ||||
shell=False, | ||||
fail_on_stderr=False, | ||||
fail_on_return_code=False | ||||
) | ||||
if self.update_server_info and git_command == 'git-receive-pack': | ||||
# We need to fully consume the iterator here, as the | ||||
# update-server-info command needs to be run after the push. | ||||
out = list(out) | ||||
# Updating refs manually after each push. | ||||
# This is required as some clients are exposing Git repos internally | ||||
# with the dumb protocol. | ||||
cmd = [self.git_path, 'update-server-info'] | ||||
log.debug('handling cmd %s', cmd) | ||||
output = subprocessio.SubprocessIOChunker( | ||||
cmd, | ||||
r1048 | input_stream=inputstream, | |||
r0 | env=gitenv, | |||
cwd=self.content_path, | ||||
shell=False, | ||||
fail_on_stderr=False, | ||||
fail_on_return_code=False | ||||
) | ||||
# Consume all the output so the subprocess finishes | ||||
for _ in output: | ||||
pass | ||||
r1048 | # Upload-pack == clone | |||
r0 | if git_command == 'git-upload-pack': | |||
r1108 | hook_response = hooks.git_post_pull(self.extras) | |||
post_pull_messages = hook_response.output | ||||
r1048 | resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages) | |||
r0 | else: | |||
resp.app_iter = out | ||||
return resp | ||||
def __call__(self, environ, start_response): | ||||
request = Request(environ) | ||||
_path = self._get_fixedpath(request.path_info) | ||||
if _path.startswith('info/refs'): | ||||
app = self.inforefs | ||||
else: | ||||
app = self.backend | ||||
try: | ||||
resp = app(request, environ) | ||||
except exc.HTTPException as error: | ||||
log.exception('HTTP Error') | ||||
resp = error | ||||
except Exception: | ||||
log.exception('Unknown error') | ||||
resp = exc.HTTPInternalServerError() | ||||
return resp(environ, start_response) | ||||