##// END OF EJS Templates
fix(git ops): moved git operations into vcsserver and use libgit2 when possible....
fix(git ops): moved git operations into vcsserver and use libgit2 when possible. This fixes issues with calling merge-base on repos that are not fully in sync

File last commit:

r1327:278da2b3 default
r1337:1fc1a507 default
Show More
pygrack.py
417 lines | 15.3 KiB | text/x-python | PythonLexer
initial commit
r0 # RhodeCode VCSServer provides access to different vcs backends via network.
core: updated copyrights to 2024
r1327 # Copyright (C) 2014-2024 RhodeCode GmbH
initial commit
r0 #
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""Handles the Git smart protocol."""
import os
import socket
import logging
import dulwich.protocol
python3: code change for py3 support...
r1048 from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
initial commit
r0 from webob import Request, Response, exc
chore(refactor): renamed rc_json to ext_json for ce compat
r1243 from vcsserver.lib.ext_json import json
initial commit
r0 from vcsserver import hooks, subprocessio
core: moved str_utils and type_utils to lib so it's consistent with ce, and easier to sync up codebases
r1249 from vcsserver.lib.str_utils import ascii_bytes
initial commit
r0
log = logging.getLogger(__name__)
lint: auto-fixes
r1152 class FileWrapper:
initial commit
r0 """File wrapper that ensures how much data is read from it."""
def __init__(self, fd, content_length):
self.fd = fd
self.content_length = content_length
self.remain = content_length
def read(self, size):
if size <= self.remain:
try:
data = self.fd.read(size)
except socket.error:
raise IOError(self)
self.remain -= size
elif self.remain:
data = self.fd.read(self.remain)
self.remain = 0
else:
data = None
return data
def __repr__(self):
python3: fixes and code optimization for python3.11
r1114 return '<FileWrapper {} len: {}, read: {}>'.format(
initial commit
r0 self.fd, self.content_length, self.content_length - self.remain
)
lint: auto-fixes
r1152 class GitRepository:
initial commit
r0 """WSGI app for handling Git smart protocol endpoints."""
python3: code change for py3 support...
r1048 git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
initial commit
r0 commands = frozenset(('git-upload-pack', 'git-receive-pack'))
python3: fixes and code optimization for python3.11
r1114 valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
initial commit
r0
# The last bytes are the SHA1 of the first 12 bytes.
EMPTY_PACK = (
python3: code change for py3 support...
r1048 b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
initial commit
r0 )
python3: code change for py3 support...
r1048 FLUSH_PACKET = b"0000"
initial commit
r0
python3: code change for py3 support...
r1048 SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
initial commit
r0 files = frozenset(f.lower() for f in os.listdir(content_path))
valid_dir_signature = self.git_folder_signature.issubset(files)
if not valid_dir_signature:
core: various cleanups and fixes
r1118 raise OSError(f'{content_path} missing git signature')
initial commit
r0
self.content_path = content_path
self.repo_name = repo_name
self.extras = extras
self.git_path = git_path
self.update_server_info = update_server_info
def _get_fixedpath(self, path):
"""
Small fix for repo_path
:param path:
"""
git: handle properly the .git bare repo clones.
r202 path = path.split(self.repo_name, 1)[-1]
if path.startswith('.git'):
# for bare repos we still get the .git prefix inside, we skip it
# here, and remove from the service command
path = path[4:]
return path.strip('/')
initial commit
r0
def inforefs(self, request, unused_environ):
"""
WSGI Response producer for HTTP GET Git Smart
HTTP /info/refs request.
"""
git_command = request.GET.get('service')
if git_command not in self.commands:
log.debug('command %s not allowed', git_command)
return exc.HTTPForbidden()
# please, resist the urge to add '\n' to git capture and increment
# line count by 1.
# by git docs: Documentation/technical/http-protocol.txt#L214 \n is
# a part of protocol.
# The code in Git client not only does NOT need '\n', but actually
# blows up if you sprinkle "flush" (0000) as "0001\n".
# It reads binary, per number of bytes specified.
# if you do add '\n' as part of data, count it.
lint: more autofixes
r1153 server_advert = f'# service={git_command}\n'
python3: code change for py3 support...
r1048 packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
initial commit
r0 try:
gitenv = dict(os.environ)
# forget all configs
gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
command = [self.git_path, git_command[4:], '--stateless-rpc',
'--advertise-refs', self.content_path]
out = subprocessio.SubprocessIOChunker(
command,
env=gitenv,
python3: code change for py3 support...
r1048 starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
initial commit
r0 shell=False
)
python3: code change for py3 support...
r1048 except OSError:
initial commit
r0 log.exception('Error processing command')
raise exc.HTTPExpectationFailed()
resp = Response()
python3: code change for py3 support...
r1048 resp.content_type = f'application/x-{git_command}-advertisement'
initial commit
r0 resp.charset = None
resp.app_iter = out
return resp
def _get_want_capabilities(self, request):
"""Read the capabilities found in the first want line of the request."""
pos = request.body_file_seekable.tell()
first_line = request.body_file_seekable.readline()
request.body_file_seekable.seek(pos)
return frozenset(
dulwich.protocol.extract_want_line_capabilities(first_line)[1])
def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
"""
Construct a response with an empty PACK file.
We use an empty PACK file, as that would trigger the failure of the pull
or clone command.
We also print in the error output a message explaining why the command
was aborted.
python3: code change for py3 support...
r1048 If additionally, the user is accepting messages we send them the output
initial commit
r0 of the pre-pull hook.
Note that for clients not supporting side-band we just send them the
emtpy PACK file.
"""
python3: code change for py3 support...
r1048
initial commit
r0 if self.SIDE_BAND_CAPS.intersection(capabilities):
response = []
proto = dulwich.protocol.Protocol(None, response.append)
python3: code change for py3 support...
r1048 proto.write_pkt_line(dulwich.protocol.NAK_LINE)
self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
initial commit
r0 # N.B.(skreft): Do not change the sideband channel to 3, as that
# produces a fatal error in the client:
# fatal: error in sideband demultiplexer
python3: code change for py3 support...
r1048 proto.write_sideband(
dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
proto.write_sideband(
dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
initial commit
r0
python3: code change for py3 support...
r1048 # writes b"0000" as default
initial commit
r0 proto.write_pkt_line(None)
return response
else:
python3: code change for py3 support...
r1048 return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
def _build_post_pull_response(self, response, capabilities, start_message, end_message):
"""
Given a list response we inject the post-pull messages.
We only inject the messages if the client supports sideband, and the
response has the format:
0008NAK\n...0000
Note that we do not check the no-progress capability as by default, git
sends it, which effectively would block all messages.
"""
if not self.SIDE_BAND_CAPS.intersection(capabilities):
return response
if not start_message and not end_message:
return response
try:
iter(response)
# iterator probably will work, we continue
except TypeError:
raise TypeError(f'response must be an iterator: got {type(response)}')
if isinstance(response, (list, tuple)):
raise TypeError(f'response must be an iterator: got {type(response)}')
def injected_response():
initial commit
r0
python3: code change for py3 support...
r1048 do_loop = 1
header_injected = 0
next_item = None
has_item = False
core: various cleanups and fixes
r1118 item = b''
python3: code change for py3 support...
r1048 while do_loop:
try:
next_item = next(response)
except StopIteration:
do_loop = 0
if has_item:
# last item ! alter it now
if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
new_response = [item[:-4]]
new_response.extend(self._get_messages(end_message, capabilities))
new_response.append(self.FLUSH_PACKET)
item = b''.join(new_response)
yield item
core: various cleanups and fixes
r1118
python3: code change for py3 support...
r1048 has_item = True
item = next_item
# alter item if it's the initial chunk
if not header_injected and item.startswith(b'0008NAK\n'):
new_response = [b'0008NAK\n']
new_response.extend(self._get_messages(start_message, capabilities))
new_response.append(item[8:])
item = b''.join(new_response)
header_injected = 1
return injected_response()
def _write_sideband_to_proto(self, proto, data, capabilities):
initial commit
r0 """
python3: code change for py3 support...
r1048 Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
initial commit
r0
We do not use dulwich's write_sideband directly as it only supports
side-band-64k.
"""
if not data:
return
# N.B.(skreft): The values below are explained in the pack protocol
# documentation, section Packfile Data.
# https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
python3: code change for py3 support...
r1048 if CAPABILITY_SIDE_BAND_64K in capabilities:
initial commit
r0 chunk_size = 65515
python3: code change for py3 support...
r1048 elif CAPABILITY_SIDE_BAND in capabilities:
initial commit
r0 chunk_size = 995
else:
return
python3: code change for py3 support...
r1048 chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
initial commit
r0
for chunk in chunker:
python3: code change for py3 support...
r1048 proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
initial commit
r0
def _get_messages(self, data, capabilities):
"""Return a list with packets for sending data in sideband number 2."""
response = []
proto = dulwich.protocol.Protocol(None, response.append)
python3: code change for py3 support...
r1048 self._write_sideband_to_proto(proto, data, capabilities)
initial commit
r0
return response
def backend(self, request, environ):
"""
WSGI Response producer for HTTP POST Git Smart HTTP requests.
Reads commands and data from HTTP POST's body.
returns an iterator obj with contents of git command's
response to stdout
"""
# TODO(skreft): think how we could detect an HTTPLockedException, as
# we probably want to have the same mechanism used by mercurial and
# simplevcs.
# For that we would need to parse the output of the command looking for
# some signs of the HTTPLockedError, parse the data and reraise it in
# pygrack. However, that would interfere with the streaming.
#
# Now the output of a blocked push is:
# Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
# POST git-receive-pack (1047 bytes)
# remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
# To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
# ! [remote rejected] master -> master (pre-receive hook declined)
# error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
git_command = self._get_fixedpath(request.path_info)
if git_command not in self.commands:
log.debug('command %s not allowed', git_command)
return exc.HTTPForbidden()
capabilities = None
if git_command == 'git-upload-pack':
capabilities = self._get_want_capabilities(request)
if 'CONTENT_LENGTH' in environ:
inputstream = FileWrapper(request.body_file_seekable,
request.content_length)
else:
inputstream = request.body_file_seekable
resp = Response()
python3: fixes and code optimization for python3.11
r1114 resp.content_type = f'application/x-{git_command}-result'
initial commit
r0 resp.charset = None
git: make sure we don't break streaming in case of empty pull messages....
r273 pre_pull_messages = ''
python3: code change for py3 support...
r1048 # Upload-pack == clone
initial commit
r0 if git_command == 'git-upload-pack':
hooks: added few python3 related fixes to handle bytes vs str on Mercurial hooks
r1108 hook_response = hooks.git_pre_pull(self.extras)
if hook_response.status != 0:
pre_pull_messages = hook_response.output
initial commit
r0 resp.app_iter = self._build_failed_pre_pull_response(
capabilities, pre_pull_messages)
return resp
gitenv = dict(os.environ)
# forget all configs
gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
cmd = [self.git_path, git_command[4:], '--stateless-rpc',
self.content_path]
log.debug('handling cmd %s', cmd)
out = subprocessio.SubprocessIOChunker(
cmd,
python3: code change for py3 support...
r1048 input_stream=inputstream,
initial commit
r0 env=gitenv,
cwd=self.content_path,
shell=False,
fail_on_stderr=False,
fail_on_return_code=False
)
if self.update_server_info and git_command == 'git-receive-pack':
# We need to fully consume the iterator here, as the
# update-server-info command needs to be run after the push.
out = list(out)
# Updating refs manually after each push.
# This is required as some clients are exposing Git repos internally
# with the dumb protocol.
cmd = [self.git_path, 'update-server-info']
log.debug('handling cmd %s', cmd)
output = subprocessio.SubprocessIOChunker(
cmd,
python3: code change for py3 support...
r1048 input_stream=inputstream,
initial commit
r0 env=gitenv,
cwd=self.content_path,
shell=False,
fail_on_stderr=False,
fail_on_return_code=False
)
# Consume all the output so the subprocess finishes
for _ in output:
pass
python3: code change for py3 support...
r1048 # Upload-pack == clone
initial commit
r0 if git_command == 'git-upload-pack':
hooks: added few python3 related fixes to handle bytes vs str on Mercurial hooks
r1108 hook_response = hooks.git_post_pull(self.extras)
post_pull_messages = hook_response.output
python3: code change for py3 support...
r1048 resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
initial commit
r0 else:
resp.app_iter = out
return resp
def __call__(self, environ, start_response):
request = Request(environ)
_path = self._get_fixedpath(request.path_info)
if _path.startswith('info/refs'):
app = self.inforefs
else:
app = self.backend
try:
resp = app(request, environ)
except exc.HTTPException as error:
log.exception('HTTP Error')
resp = error
except Exception:
log.exception('Unknown error')
resp = exc.HTTPInternalServerError()
return resp(environ, start_response)