pygrack.py
386 lines
| 14.0 KiB
| text/x-python
|
PythonLexer
/ vcsserver / pygrack.py
r0 | # RhodeCode VCSServer provides access to different vcs backends via network. | |||
r850 | # Copyright (C) 2014-2020 RhodeCode GmbH | |||
r0 | # | |||
# This program is free software; you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation; either version 3 of the License, or | ||||
# (at your option) any later version. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU General Public License | ||||
# along with this program; if not, write to the Free Software Foundation, | ||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
"""Handles the Git smart protocol.""" | ||||
import os | ||||
import socket | ||||
import logging | ||||
import simplejson as json | ||||
import dulwich.protocol | ||||
from webob import Request, Response, exc | ||||
from vcsserver import hooks, subprocessio | ||||
log = logging.getLogger(__name__) | ||||
class FileWrapper(object): | ||||
"""File wrapper that ensures how much data is read from it.""" | ||||
def __init__(self, fd, content_length): | ||||
self.fd = fd | ||||
self.content_length = content_length | ||||
self.remain = content_length | ||||
def read(self, size): | ||||
if size <= self.remain: | ||||
try: | ||||
data = self.fd.read(size) | ||||
except socket.error: | ||||
raise IOError(self) | ||||
self.remain -= size | ||||
elif self.remain: | ||||
data = self.fd.read(self.remain) | ||||
self.remain = 0 | ||||
else: | ||||
data = None | ||||
return data | ||||
def __repr__(self): | ||||
return '<FileWrapper %s len: %s, read: %s>' % ( | ||||
self.fd, self.content_length, self.content_length - self.remain | ||||
) | ||||
class GitRepository(object): | ||||
"""WSGI app for handling Git smart protocol endpoints.""" | ||||
git_folder_signature = frozenset( | ||||
('config', 'head', 'info', 'objects', 'refs')) | ||||
commands = frozenset(('git-upload-pack', 'git-receive-pack')) | ||||
valid_accepts = frozenset(('application/x-%s-result' % | ||||
c for c in commands)) | ||||
# The last bytes are the SHA1 of the first 12 bytes. | ||||
EMPTY_PACK = ( | ||||
'PACK\x00\x00\x00\x02\x00\x00\x00\x00' + | ||||
'\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e' | ||||
) | ||||
SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k')) | ||||
def __init__(self, repo_name, content_path, git_path, update_server_info, | ||||
extras): | ||||
files = frozenset(f.lower() for f in os.listdir(content_path)) | ||||
valid_dir_signature = self.git_folder_signature.issubset(files) | ||||
if not valid_dir_signature: | ||||
raise OSError('%s missing git signature' % content_path) | ||||
self.content_path = content_path | ||||
self.repo_name = repo_name | ||||
self.extras = extras | ||||
self.git_path = git_path | ||||
self.update_server_info = update_server_info | ||||
def _get_fixedpath(self, path): | ||||
""" | ||||
Small fix for repo_path | ||||
:param path: | ||||
""" | ||||
r202 | path = path.split(self.repo_name, 1)[-1] | |||
if path.startswith('.git'): | ||||
# for bare repos we still get the .git prefix inside, we skip it | ||||
# here, and remove from the service command | ||||
path = path[4:] | ||||
return path.strip('/') | ||||
r0 | ||||
def inforefs(self, request, unused_environ): | ||||
""" | ||||
WSGI Response producer for HTTP GET Git Smart | ||||
HTTP /info/refs request. | ||||
""" | ||||
git_command = request.GET.get('service') | ||||
if git_command not in self.commands: | ||||
log.debug('command %s not allowed', git_command) | ||||
return exc.HTTPForbidden() | ||||
# please, resist the urge to add '\n' to git capture and increment | ||||
# line count by 1. | ||||
# by git docs: Documentation/technical/http-protocol.txt#L214 \n is | ||||
# a part of protocol. | ||||
# The code in Git client not only does NOT need '\n', but actually | ||||
# blows up if you sprinkle "flush" (0000) as "0001\n". | ||||
# It reads binary, per number of bytes specified. | ||||
# if you do add '\n' as part of data, count it. | ||||
server_advert = '# service=%s\n' % git_command | ||||
packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower() | ||||
try: | ||||
gitenv = dict(os.environ) | ||||
# forget all configs | ||||
gitenv['RC_SCM_DATA'] = json.dumps(self.extras) | ||||
command = [self.git_path, git_command[4:], '--stateless-rpc', | ||||
'--advertise-refs', self.content_path] | ||||
out = subprocessio.SubprocessIOChunker( | ||||
command, | ||||
env=gitenv, | ||||
starting_values=[packet_len + server_advert + '0000'], | ||||
shell=False | ||||
) | ||||
except EnvironmentError: | ||||
log.exception('Error processing command') | ||||
raise exc.HTTPExpectationFailed() | ||||
resp = Response() | ||||
resp.content_type = 'application/x-%s-advertisement' % str(git_command) | ||||
resp.charset = None | ||||
resp.app_iter = out | ||||
return resp | ||||
def _get_want_capabilities(self, request): | ||||
"""Read the capabilities found in the first want line of the request.""" | ||||
pos = request.body_file_seekable.tell() | ||||
first_line = request.body_file_seekable.readline() | ||||
request.body_file_seekable.seek(pos) | ||||
return frozenset( | ||||
dulwich.protocol.extract_want_line_capabilities(first_line)[1]) | ||||
def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages): | ||||
""" | ||||
Construct a response with an empty PACK file. | ||||
We use an empty PACK file, as that would trigger the failure of the pull | ||||
or clone command. | ||||
We also print in the error output a message explaining why the command | ||||
was aborted. | ||||
If aditionally, the user is accepting messages we send them the output | ||||
of the pre-pull hook. | ||||
Note that for clients not supporting side-band we just send them the | ||||
emtpy PACK file. | ||||
""" | ||||
if self.SIDE_BAND_CAPS.intersection(capabilities): | ||||
response = [] | ||||
proto = dulwich.protocol.Protocol(None, response.append) | ||||
proto.write_pkt_line('NAK\n') | ||||
self._write_sideband_to_proto(pre_pull_messages, proto, | ||||
capabilities) | ||||
# N.B.(skreft): Do not change the sideband channel to 3, as that | ||||
# produces a fatal error in the client: | ||||
# fatal: error in sideband demultiplexer | ||||
proto.write_sideband(2, 'Pre pull hook failed: aborting\n') | ||||
proto.write_sideband(1, self.EMPTY_PACK) | ||||
# writes 0000 | ||||
proto.write_pkt_line(None) | ||||
return response | ||||
else: | ||||
return [self.EMPTY_PACK] | ||||
def _write_sideband_to_proto(self, data, proto, capabilities): | ||||
""" | ||||
Write the data to the proto's sideband number 2. | ||||
We do not use dulwich's write_sideband directly as it only supports | ||||
side-band-64k. | ||||
""" | ||||
if not data: | ||||
return | ||||
# N.B.(skreft): The values below are explained in the pack protocol | ||||
# documentation, section Packfile Data. | ||||
# https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt | ||||
if 'side-band-64k' in capabilities: | ||||
chunk_size = 65515 | ||||
elif 'side-band' in capabilities: | ||||
chunk_size = 995 | ||||
else: | ||||
return | ||||
chunker = ( | ||||
r982 | data[i:i + chunk_size] for i in range(0, len(data), chunk_size)) | |||
r0 | ||||
for chunk in chunker: | ||||
proto.write_sideband(2, chunk) | ||||
def _get_messages(self, data, capabilities): | ||||
"""Return a list with packets for sending data in sideband number 2.""" | ||||
response = [] | ||||
proto = dulwich.protocol.Protocol(None, response.append) | ||||
self._write_sideband_to_proto(data, proto, capabilities) | ||||
return response | ||||
def _inject_messages_to_response(self, response, capabilities, | ||||
start_messages, end_messages): | ||||
""" | ||||
r273 | Given a list response we inject the pre/post-pull messages. | |||
r0 | ||||
We only inject the messages if the client supports sideband, and the | ||||
response has the format: | ||||
0008NAK\n...0000 | ||||
Note that we do not check the no-progress capability as by default, git | ||||
sends it, which effectively would block all messages. | ||||
""" | ||||
if not self.SIDE_BAND_CAPS.intersection(capabilities): | ||||
return response | ||||
r273 | if not start_messages and not end_messages: | |||
return response | ||||
# make a list out of response if it's an iterator | ||||
# so we can investigate it for message injection. | ||||
if hasattr(response, '__iter__'): | ||||
response = list(response) | ||||
r0 | if (not response[0].startswith('0008NAK\n') or | |||
not response[-1].endswith('0000')): | ||||
return response | ||||
new_response = ['0008NAK\n'] | ||||
new_response.extend(self._get_messages(start_messages, capabilities)) | ||||
if len(response) == 1: | ||||
new_response.append(response[0][8:-4]) | ||||
else: | ||||
new_response.append(response[0][8:]) | ||||
new_response.extend(response[1:-1]) | ||||
new_response.append(response[-1][:-4]) | ||||
new_response.extend(self._get_messages(end_messages, capabilities)) | ||||
new_response.append('0000') | ||||
return new_response | ||||
def backend(self, request, environ): | ||||
""" | ||||
WSGI Response producer for HTTP POST Git Smart HTTP requests. | ||||
Reads commands and data from HTTP POST's body. | ||||
returns an iterator obj with contents of git command's | ||||
response to stdout | ||||
""" | ||||
# TODO(skreft): think how we could detect an HTTPLockedException, as | ||||
# we probably want to have the same mechanism used by mercurial and | ||||
# simplevcs. | ||||
# For that we would need to parse the output of the command looking for | ||||
# some signs of the HTTPLockedError, parse the data and reraise it in | ||||
# pygrack. However, that would interfere with the streaming. | ||||
# | ||||
# Now the output of a blocked push is: | ||||
# Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git | ||||
# POST git-receive-pack (1047 bytes) | ||||
# remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto` | ||||
# To http://test_regular:test12@127.0.0.1:5001/vcs_test_git | ||||
# ! [remote rejected] master -> master (pre-receive hook declined) | ||||
# error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git' | ||||
git_command = self._get_fixedpath(request.path_info) | ||||
if git_command not in self.commands: | ||||
log.debug('command %s not allowed', git_command) | ||||
return exc.HTTPForbidden() | ||||
capabilities = None | ||||
if git_command == 'git-upload-pack': | ||||
capabilities = self._get_want_capabilities(request) | ||||
if 'CONTENT_LENGTH' in environ: | ||||
inputstream = FileWrapper(request.body_file_seekable, | ||||
request.content_length) | ||||
else: | ||||
inputstream = request.body_file_seekable | ||||
resp = Response() | ||||
resp.content_type = ('application/x-%s-result' % | ||||
git_command.encode('utf8')) | ||||
resp.charset = None | ||||
r273 | pre_pull_messages = '' | |||
r0 | if git_command == 'git-upload-pack': | |||
status, pre_pull_messages = hooks.git_pre_pull(self.extras) | ||||
if status != 0: | ||||
resp.app_iter = self._build_failed_pre_pull_response( | ||||
capabilities, pre_pull_messages) | ||||
return resp | ||||
gitenv = dict(os.environ) | ||||
# forget all configs | ||||
gitenv['GIT_CONFIG_NOGLOBAL'] = '1' | ||||
gitenv['RC_SCM_DATA'] = json.dumps(self.extras) | ||||
cmd = [self.git_path, git_command[4:], '--stateless-rpc', | ||||
self.content_path] | ||||
log.debug('handling cmd %s', cmd) | ||||
out = subprocessio.SubprocessIOChunker( | ||||
cmd, | ||||
inputstream=inputstream, | ||||
env=gitenv, | ||||
cwd=self.content_path, | ||||
shell=False, | ||||
fail_on_stderr=False, | ||||
fail_on_return_code=False | ||||
) | ||||
if self.update_server_info and git_command == 'git-receive-pack': | ||||
# We need to fully consume the iterator here, as the | ||||
# update-server-info command needs to be run after the push. | ||||
out = list(out) | ||||
# Updating refs manually after each push. | ||||
# This is required as some clients are exposing Git repos internally | ||||
# with the dumb protocol. | ||||
cmd = [self.git_path, 'update-server-info'] | ||||
log.debug('handling cmd %s', cmd) | ||||
output = subprocessio.SubprocessIOChunker( | ||||
cmd, | ||||
inputstream=inputstream, | ||||
env=gitenv, | ||||
cwd=self.content_path, | ||||
shell=False, | ||||
fail_on_stderr=False, | ||||
fail_on_return_code=False | ||||
) | ||||
# Consume all the output so the subprocess finishes | ||||
for _ in output: | ||||
pass | ||||
if git_command == 'git-upload-pack': | ||||
unused_status, post_pull_messages = hooks.git_post_pull(self.extras) | ||||
resp.app_iter = self._inject_messages_to_response( | ||||
out, capabilities, pre_pull_messages, post_pull_messages) | ||||
else: | ||||
resp.app_iter = out | ||||
return resp | ||||
def __call__(self, environ, start_response): | ||||
request = Request(environ) | ||||
_path = self._get_fixedpath(request.path_info) | ||||
if _path.startswith('info/refs'): | ||||
app = self.inforefs | ||||
else: | ||||
app = self.backend | ||||
try: | ||||
resp = app(request, environ) | ||||
except exc.HTTPException as error: | ||||
log.exception('HTTP Error') | ||||
resp = error | ||||
except Exception: | ||||
log.exception('Unknown error') | ||||
resp = exc.HTTPInternalServerError() | ||||
return resp(environ, start_response) | ||||