# RhodeCode VCSServer provides access to different vcs backends via network. # Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Handles the Git smart protocol.""" import os import socket import logging import dulwich.protocol from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K from webob import Request, Response, exc from vcsserver.lib.ext_json import json from vcsserver import hooks, subprocessio from vcsserver.str_utils import ascii_bytes log = logging.getLogger(__name__) class FileWrapper: """File wrapper that ensures how much data is read from it.""" def __init__(self, fd, content_length): self.fd = fd self.content_length = content_length self.remain = content_length def read(self, size): if size <= self.remain: try: data = self.fd.read(size) except socket.error: raise IOError(self) self.remain -= size elif self.remain: data = self.fd.read(self.remain) self.remain = 0 else: data = None return data def __repr__(self): return ''.format( self.fd, self.content_length, self.content_length - self.remain ) class GitRepository: """WSGI app for handling Git smart protocol endpoints.""" git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs')) commands = frozenset(('git-upload-pack', 'git-receive-pack')) valid_accepts = frozenset(f'application/x-{c}-result' for c in commands) # The last bytes are the SHA1 of the first 12 bytes. EMPTY_PACK = ( b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' + b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e' ) FLUSH_PACKET = b"0000" SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K)) def __init__(self, repo_name, content_path, git_path, update_server_info, extras): files = frozenset(f.lower() for f in os.listdir(content_path)) valid_dir_signature = self.git_folder_signature.issubset(files) if not valid_dir_signature: raise OSError(f'{content_path} missing git signature') self.content_path = content_path self.repo_name = repo_name self.extras = extras self.git_path = git_path self.update_server_info = update_server_info def _get_fixedpath(self, path): """ Small fix for repo_path :param path: """ path = path.split(self.repo_name, 1)[-1] if path.startswith('.git'): # for bare repos we still get the .git prefix inside, we skip it # here, and remove from the service command path = path[4:] return path.strip('/') def inforefs(self, request, unused_environ): """ WSGI Response producer for HTTP GET Git Smart HTTP /info/refs request. """ git_command = request.GET.get('service') if git_command not in self.commands: log.debug('command %s not allowed', git_command) return exc.HTTPForbidden() # please, resist the urge to add '\n' to git capture and increment # line count by 1. # by git docs: Documentation/technical/http-protocol.txt#L214 \n is # a part of protocol. # The code in Git client not only does NOT need '\n', but actually # blows up if you sprinkle "flush" (0000) as "0001\n". # It reads binary, per number of bytes specified. # if you do add '\n' as part of data, count it. server_advert = f'# service={git_command}\n' packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower() try: gitenv = dict(os.environ) # forget all configs gitenv['RC_SCM_DATA'] = json.dumps(self.extras) command = [self.git_path, git_command[4:], '--stateless-rpc', '--advertise-refs', self.content_path] out = subprocessio.SubprocessIOChunker( command, env=gitenv, starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET], shell=False ) except OSError: log.exception('Error processing command') raise exc.HTTPExpectationFailed() resp = Response() resp.content_type = f'application/x-{git_command}-advertisement' resp.charset = None resp.app_iter = out return resp def _get_want_capabilities(self, request): """Read the capabilities found in the first want line of the request.""" pos = request.body_file_seekable.tell() first_line = request.body_file_seekable.readline() request.body_file_seekable.seek(pos) return frozenset( dulwich.protocol.extract_want_line_capabilities(first_line)[1]) def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages): """ Construct a response with an empty PACK file. We use an empty PACK file, as that would trigger the failure of the pull or clone command. We also print in the error output a message explaining why the command was aborted. If additionally, the user is accepting messages we send them the output of the pre-pull hook. Note that for clients not supporting side-band we just send them the emtpy PACK file. """ if self.SIDE_BAND_CAPS.intersection(capabilities): response = [] proto = dulwich.protocol.Protocol(None, response.append) proto.write_pkt_line(dulwich.protocol.NAK_LINE) self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities) # N.B.(skreft): Do not change the sideband channel to 3, as that # produces a fatal error in the client: # fatal: error in sideband demultiplexer proto.write_sideband( dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True)) proto.write_sideband( dulwich.protocol.SIDE_BAND_CHANNEL_DATA, ascii_bytes(self.EMPTY_PACK, allow_bytes=True)) # writes b"0000" as default proto.write_pkt_line(None) return response else: return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)] def _build_post_pull_response(self, response, capabilities, start_message, end_message): """ Given a list response we inject the post-pull messages. We only inject the messages if the client supports sideband, and the response has the format: 0008NAK\n...0000 Note that we do not check the no-progress capability as by default, git sends it, which effectively would block all messages. """ if not self.SIDE_BAND_CAPS.intersection(capabilities): return response if not start_message and not end_message: return response try: iter(response) # iterator probably will work, we continue except TypeError: raise TypeError(f'response must be an iterator: got {type(response)}') if isinstance(response, (list, tuple)): raise TypeError(f'response must be an iterator: got {type(response)}') def injected_response(): do_loop = 1 header_injected = 0 next_item = None has_item = False item = b'' while do_loop: try: next_item = next(response) except StopIteration: do_loop = 0 if has_item: # last item ! alter it now if do_loop == 0 and item.endswith(self.FLUSH_PACKET): new_response = [item[:-4]] new_response.extend(self._get_messages(end_message, capabilities)) new_response.append(self.FLUSH_PACKET) item = b''.join(new_response) yield item has_item = True item = next_item # alter item if it's the initial chunk if not header_injected and item.startswith(b'0008NAK\n'): new_response = [b'0008NAK\n'] new_response.extend(self._get_messages(start_message, capabilities)) new_response.append(item[8:]) item = b''.join(new_response) header_injected = 1 return injected_response() def _write_sideband_to_proto(self, proto, data, capabilities): """ Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS We do not use dulwich's write_sideband directly as it only supports side-band-64k. """ if not data: return # N.B.(skreft): The values below are explained in the pack protocol # documentation, section Packfile Data. # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt if CAPABILITY_SIDE_BAND_64K in capabilities: chunk_size = 65515 elif CAPABILITY_SIDE_BAND in capabilities: chunk_size = 995 else: return chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size)) for chunk in chunker: proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True)) def _get_messages(self, data, capabilities): """Return a list with packets for sending data in sideband number 2.""" response = [] proto = dulwich.protocol.Protocol(None, response.append) self._write_sideband_to_proto(proto, data, capabilities) return response def backend(self, request, environ): """ WSGI Response producer for HTTP POST Git Smart HTTP requests. Reads commands and data from HTTP POST's body. returns an iterator obj with contents of git command's response to stdout """ # TODO(skreft): think how we could detect an HTTPLockedException, as # we probably want to have the same mechanism used by mercurial and # simplevcs. # For that we would need to parse the output of the command looking for # some signs of the HTTPLockedError, parse the data and reraise it in # pygrack. However, that would interfere with the streaming. # # Now the output of a blocked push is: # Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git # POST git-receive-pack (1047 bytes) # remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto` # To http://test_regular:test12@127.0.0.1:5001/vcs_test_git # ! [remote rejected] master -> master (pre-receive hook declined) # error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git' git_command = self._get_fixedpath(request.path_info) if git_command not in self.commands: log.debug('command %s not allowed', git_command) return exc.HTTPForbidden() capabilities = None if git_command == 'git-upload-pack': capabilities = self._get_want_capabilities(request) if 'CONTENT_LENGTH' in environ: inputstream = FileWrapper(request.body_file_seekable, request.content_length) else: inputstream = request.body_file_seekable resp = Response() resp.content_type = f'application/x-{git_command}-result' resp.charset = None pre_pull_messages = '' # Upload-pack == clone if git_command == 'git-upload-pack': hook_response = hooks.git_pre_pull(self.extras) if hook_response.status != 0: pre_pull_messages = hook_response.output resp.app_iter = self._build_failed_pre_pull_response( capabilities, pre_pull_messages) return resp gitenv = dict(os.environ) # forget all configs gitenv['GIT_CONFIG_NOGLOBAL'] = '1' gitenv['RC_SCM_DATA'] = json.dumps(self.extras) cmd = [self.git_path, git_command[4:], '--stateless-rpc', self.content_path] log.debug('handling cmd %s', cmd) out = subprocessio.SubprocessIOChunker( cmd, input_stream=inputstream, env=gitenv, cwd=self.content_path, shell=False, fail_on_stderr=False, fail_on_return_code=False ) if self.update_server_info and git_command == 'git-receive-pack': # We need to fully consume the iterator here, as the # update-server-info command needs to be run after the push. out = list(out) # Updating refs manually after each push. # This is required as some clients are exposing Git repos internally # with the dumb protocol. cmd = [self.git_path, 'update-server-info'] log.debug('handling cmd %s', cmd) output = subprocessio.SubprocessIOChunker( cmd, input_stream=inputstream, env=gitenv, cwd=self.content_path, shell=False, fail_on_stderr=False, fail_on_return_code=False ) # Consume all the output so the subprocess finishes for _ in output: pass # Upload-pack == clone if git_command == 'git-upload-pack': hook_response = hooks.git_post_pull(self.extras) post_pull_messages = hook_response.output resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages) else: resp.app_iter = out return resp def __call__(self, environ, start_response): request = Request(environ) _path = self._get_fixedpath(request.path_info) if _path.startswith('info/refs'): app = self.inforefs else: app = self.backend try: resp = app(request, environ) except exc.HTTPException as error: log.exception('HTTP Error') resp = error except Exception: log.exception('Unknown error') resp = exc.HTTPInternalServerError() return resp(environ, start_response)