##// END OF EJS Templates
pip2nix: cleanups and NEW package
pip2nix: cleanups and NEW package

File last commit:

r850:cbc05af2 default
r975:310656f4 python3
Show More
pygrack.py
386 lines | 14.0 KiB | text/x-python | PythonLexer
initial commit
r0 # RhodeCode VCSServer provides access to different vcs backends via network.
code: update copyrights to 2020
r850 # Copyright (C) 2014-2020 RhodeCode GmbH
initial commit
r0 #
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""Handles the Git smart protocol."""
import os
import socket
import logging
import simplejson as json
import dulwich.protocol
from webob import Request, Response, exc
from vcsserver import hooks, subprocessio
log = logging.getLogger(__name__)
class FileWrapper(object):
"""File wrapper that ensures how much data is read from it."""
def __init__(self, fd, content_length):
self.fd = fd
self.content_length = content_length
self.remain = content_length
def read(self, size):
if size <= self.remain:
try:
data = self.fd.read(size)
except socket.error:
raise IOError(self)
self.remain -= size
elif self.remain:
data = self.fd.read(self.remain)
self.remain = 0
else:
data = None
return data
def __repr__(self):
return '<FileWrapper %s len: %s, read: %s>' % (
self.fd, self.content_length, self.content_length - self.remain
)
class GitRepository(object):
"""WSGI app for handling Git smart protocol endpoints."""
git_folder_signature = frozenset(
('config', 'head', 'info', 'objects', 'refs'))
commands = frozenset(('git-upload-pack', 'git-receive-pack'))
valid_accepts = frozenset(('application/x-%s-result' %
c for c in commands))
# The last bytes are the SHA1 of the first 12 bytes.
EMPTY_PACK = (
'PACK\x00\x00\x00\x02\x00\x00\x00\x00' +
'\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
)
SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k'))
def __init__(self, repo_name, content_path, git_path, update_server_info,
extras):
files = frozenset(f.lower() for f in os.listdir(content_path))
valid_dir_signature = self.git_folder_signature.issubset(files)
if not valid_dir_signature:
raise OSError('%s missing git signature' % content_path)
self.content_path = content_path
self.repo_name = repo_name
self.extras = extras
self.git_path = git_path
self.update_server_info = update_server_info
def _get_fixedpath(self, path):
"""
Small fix for repo_path
:param path:
"""
git: handle properly the .git bare repo clones.
r202 path = path.split(self.repo_name, 1)[-1]
if path.startswith('.git'):
# for bare repos we still get the .git prefix inside, we skip it
# here, and remove from the service command
path = path[4:]
return path.strip('/')
initial commit
r0
def inforefs(self, request, unused_environ):
"""
WSGI Response producer for HTTP GET Git Smart
HTTP /info/refs request.
"""
git_command = request.GET.get('service')
if git_command not in self.commands:
log.debug('command %s not allowed', git_command)
return exc.HTTPForbidden()
# please, resist the urge to add '\n' to git capture and increment
# line count by 1.
# by git docs: Documentation/technical/http-protocol.txt#L214 \n is
# a part of protocol.
# The code in Git client not only does NOT need '\n', but actually
# blows up if you sprinkle "flush" (0000) as "0001\n".
# It reads binary, per number of bytes specified.
# if you do add '\n' as part of data, count it.
server_advert = '# service=%s\n' % git_command
packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
try:
gitenv = dict(os.environ)
# forget all configs
gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
command = [self.git_path, git_command[4:], '--stateless-rpc',
'--advertise-refs', self.content_path]
out = subprocessio.SubprocessIOChunker(
command,
env=gitenv,
starting_values=[packet_len + server_advert + '0000'],
shell=False
)
except EnvironmentError:
log.exception('Error processing command')
raise exc.HTTPExpectationFailed()
resp = Response()
resp.content_type = 'application/x-%s-advertisement' % str(git_command)
resp.charset = None
resp.app_iter = out
return resp
def _get_want_capabilities(self, request):
"""Read the capabilities found in the first want line of the request."""
pos = request.body_file_seekable.tell()
first_line = request.body_file_seekable.readline()
request.body_file_seekable.seek(pos)
return frozenset(
dulwich.protocol.extract_want_line_capabilities(first_line)[1])
def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
"""
Construct a response with an empty PACK file.
We use an empty PACK file, as that would trigger the failure of the pull
or clone command.
We also print in the error output a message explaining why the command
was aborted.
If aditionally, the user is accepting messages we send them the output
of the pre-pull hook.
Note that for clients not supporting side-band we just send them the
emtpy PACK file.
"""
if self.SIDE_BAND_CAPS.intersection(capabilities):
response = []
proto = dulwich.protocol.Protocol(None, response.append)
proto.write_pkt_line('NAK\n')
self._write_sideband_to_proto(pre_pull_messages, proto,
capabilities)
# N.B.(skreft): Do not change the sideband channel to 3, as that
# produces a fatal error in the client:
# fatal: error in sideband demultiplexer
proto.write_sideband(2, 'Pre pull hook failed: aborting\n')
proto.write_sideband(1, self.EMPTY_PACK)
# writes 0000
proto.write_pkt_line(None)
return response
else:
return [self.EMPTY_PACK]
def _write_sideband_to_proto(self, data, proto, capabilities):
"""
Write the data to the proto's sideband number 2.
We do not use dulwich's write_sideband directly as it only supports
side-band-64k.
"""
if not data:
return
# N.B.(skreft): The values below are explained in the pack protocol
# documentation, section Packfile Data.
# https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
if 'side-band-64k' in capabilities:
chunk_size = 65515
elif 'side-band' in capabilities:
chunk_size = 995
else:
return
chunker = (
data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size))
for chunk in chunker:
proto.write_sideband(2, chunk)
def _get_messages(self, data, capabilities):
"""Return a list with packets for sending data in sideband number 2."""
response = []
proto = dulwich.protocol.Protocol(None, response.append)
self._write_sideband_to_proto(data, proto, capabilities)
return response
def _inject_messages_to_response(self, response, capabilities,
start_messages, end_messages):
"""
git: make sure we don't break streaming in case of empty pull messages....
r273 Given a list response we inject the pre/post-pull messages.
initial commit
r0
We only inject the messages if the client supports sideband, and the
response has the format:
0008NAK\n...0000
Note that we do not check the no-progress capability as by default, git
sends it, which effectively would block all messages.
"""
if not self.SIDE_BAND_CAPS.intersection(capabilities):
return response
git: make sure we don't break streaming in case of empty pull messages....
r273 if not start_messages and not end_messages:
return response
# make a list out of response if it's an iterator
# so we can investigate it for message injection.
if hasattr(response, '__iter__'):
response = list(response)
initial commit
r0 if (not response[0].startswith('0008NAK\n') or
not response[-1].endswith('0000')):
return response
new_response = ['0008NAK\n']
new_response.extend(self._get_messages(start_messages, capabilities))
if len(response) == 1:
new_response.append(response[0][8:-4])
else:
new_response.append(response[0][8:])
new_response.extend(response[1:-1])
new_response.append(response[-1][:-4])
new_response.extend(self._get_messages(end_messages, capabilities))
new_response.append('0000')
return new_response
def backend(self, request, environ):
"""
WSGI Response producer for HTTP POST Git Smart HTTP requests.
Reads commands and data from HTTP POST's body.
returns an iterator obj with contents of git command's
response to stdout
"""
# TODO(skreft): think how we could detect an HTTPLockedException, as
# we probably want to have the same mechanism used by mercurial and
# simplevcs.
# For that we would need to parse the output of the command looking for
# some signs of the HTTPLockedError, parse the data and reraise it in
# pygrack. However, that would interfere with the streaming.
#
# Now the output of a blocked push is:
# Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
# POST git-receive-pack (1047 bytes)
# remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
# To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
# ! [remote rejected] master -> master (pre-receive hook declined)
# error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
git_command = self._get_fixedpath(request.path_info)
if git_command not in self.commands:
log.debug('command %s not allowed', git_command)
return exc.HTTPForbidden()
capabilities = None
if git_command == 'git-upload-pack':
capabilities = self._get_want_capabilities(request)
if 'CONTENT_LENGTH' in environ:
inputstream = FileWrapper(request.body_file_seekable,
request.content_length)
else:
inputstream = request.body_file_seekable
resp = Response()
resp.content_type = ('application/x-%s-result' %
git_command.encode('utf8'))
resp.charset = None
git: make sure we don't break streaming in case of empty pull messages....
r273 pre_pull_messages = ''
initial commit
r0 if git_command == 'git-upload-pack':
status, pre_pull_messages = hooks.git_pre_pull(self.extras)
if status != 0:
resp.app_iter = self._build_failed_pre_pull_response(
capabilities, pre_pull_messages)
return resp
gitenv = dict(os.environ)
# forget all configs
gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
cmd = [self.git_path, git_command[4:], '--stateless-rpc',
self.content_path]
log.debug('handling cmd %s', cmd)
out = subprocessio.SubprocessIOChunker(
cmd,
inputstream=inputstream,
env=gitenv,
cwd=self.content_path,
shell=False,
fail_on_stderr=False,
fail_on_return_code=False
)
if self.update_server_info and git_command == 'git-receive-pack':
# We need to fully consume the iterator here, as the
# update-server-info command needs to be run after the push.
out = list(out)
# Updating refs manually after each push.
# This is required as some clients are exposing Git repos internally
# with the dumb protocol.
cmd = [self.git_path, 'update-server-info']
log.debug('handling cmd %s', cmd)
output = subprocessio.SubprocessIOChunker(
cmd,
inputstream=inputstream,
env=gitenv,
cwd=self.content_path,
shell=False,
fail_on_stderr=False,
fail_on_return_code=False
)
# Consume all the output so the subprocess finishes
for _ in output:
pass
if git_command == 'git-upload-pack':
unused_status, post_pull_messages = hooks.git_post_pull(self.extras)
resp.app_iter = self._inject_messages_to_response(
out, capabilities, pre_pull_messages, post_pull_messages)
else:
resp.app_iter = out
return resp
def __call__(self, environ, start_response):
request = Request(environ)
_path = self._get_fixedpath(request.path_info)
if _path.startswith('info/refs'):
app = self.inforefs
else:
app = self.backend
try:
resp = app(request, environ)
except exc.HTTPException as error:
log.exception('HTTP Error')
resp = error
except Exception:
log.exception('Unknown error')
resp = exc.HTTPInternalServerError()
return resp(environ, start_response)