##// END OF EJS Templates
deps: bumped pygit and dulwich
deps: bumped pygit and dulwich

File last commit:

r1153:bc27ecc0 default
r1162:17e9e494 default
Show More
pygrack.py
417 lines | 15.3 KiB | text/x-python | PythonLexer
# RhodeCode VCSServer provides access to different vcs backends via network.
# Copyright (C) 2014-2023 RhodeCode GmbH
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""Handles the Git smart protocol."""
import os
import socket
import logging
import dulwich.protocol
from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
from webob import Request, Response, exc
from vcsserver.lib.rc_json import json
from vcsserver import hooks, subprocessio
from vcsserver.str_utils import ascii_bytes
log = logging.getLogger(__name__)
class FileWrapper:
"""File wrapper that ensures how much data is read from it."""
def __init__(self, fd, content_length):
self.fd = fd
self.content_length = content_length
self.remain = content_length
def read(self, size):
if size <= self.remain:
try:
data = self.fd.read(size)
except socket.error:
raise IOError(self)
self.remain -= size
elif self.remain:
data = self.fd.read(self.remain)
self.remain = 0
else:
data = None
return data
def __repr__(self):
return '<FileWrapper {} len: {}, read: {}>'.format(
self.fd, self.content_length, self.content_length - self.remain
)
class GitRepository:
"""WSGI app for handling Git smart protocol endpoints."""
git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
commands = frozenset(('git-upload-pack', 'git-receive-pack'))
valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
# The last bytes are the SHA1 of the first 12 bytes.
EMPTY_PACK = (
b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
)
FLUSH_PACKET = b"0000"
SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
files = frozenset(f.lower() for f in os.listdir(content_path))
valid_dir_signature = self.git_folder_signature.issubset(files)
if not valid_dir_signature:
raise OSError(f'{content_path} missing git signature')
self.content_path = content_path
self.repo_name = repo_name
self.extras = extras
self.git_path = git_path
self.update_server_info = update_server_info
def _get_fixedpath(self, path):
"""
Small fix for repo_path
:param path:
"""
path = path.split(self.repo_name, 1)[-1]
if path.startswith('.git'):
# for bare repos we still get the .git prefix inside, we skip it
# here, and remove from the service command
path = path[4:]
return path.strip('/')
def inforefs(self, request, unused_environ):
"""
WSGI Response producer for HTTP GET Git Smart
HTTP /info/refs request.
"""
git_command = request.GET.get('service')
if git_command not in self.commands:
log.debug('command %s not allowed', git_command)
return exc.HTTPForbidden()
# please, resist the urge to add '\n' to git capture and increment
# line count by 1.
# by git docs: Documentation/technical/http-protocol.txt#L214 \n is
# a part of protocol.
# The code in Git client not only does NOT need '\n', but actually
# blows up if you sprinkle "flush" (0000) as "0001\n".
# It reads binary, per number of bytes specified.
# if you do add '\n' as part of data, count it.
server_advert = f'# service={git_command}\n'
packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
try:
gitenv = dict(os.environ)
# forget all configs
gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
command = [self.git_path, git_command[4:], '--stateless-rpc',
'--advertise-refs', self.content_path]
out = subprocessio.SubprocessIOChunker(
command,
env=gitenv,
starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
shell=False
)
except OSError:
log.exception('Error processing command')
raise exc.HTTPExpectationFailed()
resp = Response()
resp.content_type = f'application/x-{git_command}-advertisement'
resp.charset = None
resp.app_iter = out
return resp
def _get_want_capabilities(self, request):
"""Read the capabilities found in the first want line of the request."""
pos = request.body_file_seekable.tell()
first_line = request.body_file_seekable.readline()
request.body_file_seekable.seek(pos)
return frozenset(
dulwich.protocol.extract_want_line_capabilities(first_line)[1])
def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
"""
Construct a response with an empty PACK file.
We use an empty PACK file, as that would trigger the failure of the pull
or clone command.
We also print in the error output a message explaining why the command
was aborted.
If additionally, the user is accepting messages we send them the output
of the pre-pull hook.
Note that for clients not supporting side-band we just send them the
emtpy PACK file.
"""
if self.SIDE_BAND_CAPS.intersection(capabilities):
response = []
proto = dulwich.protocol.Protocol(None, response.append)
proto.write_pkt_line(dulwich.protocol.NAK_LINE)
self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
# N.B.(skreft): Do not change the sideband channel to 3, as that
# produces a fatal error in the client:
# fatal: error in sideband demultiplexer
proto.write_sideband(
dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
proto.write_sideband(
dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
# writes b"0000" as default
proto.write_pkt_line(None)
return response
else:
return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
def _build_post_pull_response(self, response, capabilities, start_message, end_message):
"""
Given a list response we inject the post-pull messages.
We only inject the messages if the client supports sideband, and the
response has the format:
0008NAK\n...0000
Note that we do not check the no-progress capability as by default, git
sends it, which effectively would block all messages.
"""
if not self.SIDE_BAND_CAPS.intersection(capabilities):
return response
if not start_message and not end_message:
return response
try:
iter(response)
# iterator probably will work, we continue
except TypeError:
raise TypeError(f'response must be an iterator: got {type(response)}')
if isinstance(response, (list, tuple)):
raise TypeError(f'response must be an iterator: got {type(response)}')
def injected_response():
do_loop = 1
header_injected = 0
next_item = None
has_item = False
item = b''
while do_loop:
try:
next_item = next(response)
except StopIteration:
do_loop = 0
if has_item:
# last item ! alter it now
if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
new_response = [item[:-4]]
new_response.extend(self._get_messages(end_message, capabilities))
new_response.append(self.FLUSH_PACKET)
item = b''.join(new_response)
yield item
has_item = True
item = next_item
# alter item if it's the initial chunk
if not header_injected and item.startswith(b'0008NAK\n'):
new_response = [b'0008NAK\n']
new_response.extend(self._get_messages(start_message, capabilities))
new_response.append(item[8:])
item = b''.join(new_response)
header_injected = 1
return injected_response()
def _write_sideband_to_proto(self, proto, data, capabilities):
"""
Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
We do not use dulwich's write_sideband directly as it only supports
side-band-64k.
"""
if not data:
return
# N.B.(skreft): The values below are explained in the pack protocol
# documentation, section Packfile Data.
# https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
if CAPABILITY_SIDE_BAND_64K in capabilities:
chunk_size = 65515
elif CAPABILITY_SIDE_BAND in capabilities:
chunk_size = 995
else:
return
chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
for chunk in chunker:
proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
def _get_messages(self, data, capabilities):
"""Return a list with packets for sending data in sideband number 2."""
response = []
proto = dulwich.protocol.Protocol(None, response.append)
self._write_sideband_to_proto(proto, data, capabilities)
return response
def backend(self, request, environ):
"""
WSGI Response producer for HTTP POST Git Smart HTTP requests.
Reads commands and data from HTTP POST's body.
returns an iterator obj with contents of git command's
response to stdout
"""
# TODO(skreft): think how we could detect an HTTPLockedException, as
# we probably want to have the same mechanism used by mercurial and
# simplevcs.
# For that we would need to parse the output of the command looking for
# some signs of the HTTPLockedError, parse the data and reraise it in
# pygrack. However, that would interfere with the streaming.
#
# Now the output of a blocked push is:
# Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
# POST git-receive-pack (1047 bytes)
# remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
# To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
# ! [remote rejected] master -> master (pre-receive hook declined)
# error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
git_command = self._get_fixedpath(request.path_info)
if git_command not in self.commands:
log.debug('command %s not allowed', git_command)
return exc.HTTPForbidden()
capabilities = None
if git_command == 'git-upload-pack':
capabilities = self._get_want_capabilities(request)
if 'CONTENT_LENGTH' in environ:
inputstream = FileWrapper(request.body_file_seekable,
request.content_length)
else:
inputstream = request.body_file_seekable
resp = Response()
resp.content_type = f'application/x-{git_command}-result'
resp.charset = None
pre_pull_messages = ''
# Upload-pack == clone
if git_command == 'git-upload-pack':
hook_response = hooks.git_pre_pull(self.extras)
if hook_response.status != 0:
pre_pull_messages = hook_response.output
resp.app_iter = self._build_failed_pre_pull_response(
capabilities, pre_pull_messages)
return resp
gitenv = dict(os.environ)
# forget all configs
gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
cmd = [self.git_path, git_command[4:], '--stateless-rpc',
self.content_path]
log.debug('handling cmd %s', cmd)
out = subprocessio.SubprocessIOChunker(
cmd,
input_stream=inputstream,
env=gitenv,
cwd=self.content_path,
shell=False,
fail_on_stderr=False,
fail_on_return_code=False
)
if self.update_server_info and git_command == 'git-receive-pack':
# We need to fully consume the iterator here, as the
# update-server-info command needs to be run after the push.
out = list(out)
# Updating refs manually after each push.
# This is required as some clients are exposing Git repos internally
# with the dumb protocol.
cmd = [self.git_path, 'update-server-info']
log.debug('handling cmd %s', cmd)
output = subprocessio.SubprocessIOChunker(
cmd,
input_stream=inputstream,
env=gitenv,
cwd=self.content_path,
shell=False,
fail_on_stderr=False,
fail_on_return_code=False
)
# Consume all the output so the subprocess finishes
for _ in output:
pass
# Upload-pack == clone
if git_command == 'git-upload-pack':
hook_response = hooks.git_post_pull(self.extras)
post_pull_messages = hook_response.output
resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
else:
resp.app_iter = out
return resp
def __call__(self, environ, start_response):
request = Request(environ)
_path = self._get_fixedpath(request.path_info)
if _path.startswith('info/refs'):
app = self.inforefs
else:
app = self.backend
try:
resp = app(request, environ)
except exc.HTTPException as error:
log.exception('HTTP Error')
resp = error
except Exception:
log.exception('Unknown error')
resp = exc.HTTPInternalServerError()
return resp(environ, start_response)