# HG changeset patch # User RhodeCode Admin # Date 2023-03-03 14:18:57 # Node ID 742e21ae4e647e9af63cbafb7b9a5b13cd9ac1b3 # Parent d10fadc4ee1b7bc2b723a7f3d9c02844b89fc12f python3: code change for py3 support - python3.10 compat - switched hooks to binary msgpack protocol - fixed pygrack issues with subprocessio exhaustion causing huge memory use - multiple fixes and changes for python3 support - ALL tests pass diff --git a/vcsserver/base.py b/vcsserver/base.py --- a/vcsserver/base.py +++ b/vcsserver/base.py @@ -66,14 +66,16 @@ def obfuscate_qs(query_string): k, '={}'.format(v) if v else '') for k, v in parsed) -def raise_from_original(new_type): +def raise_from_original(new_type, org_exc: Exception): """ Raise a new exception type with original args and traceback. """ + exc_type, exc_value, exc_traceback = sys.exc_info() new_exc = new_type(*exc_value.args) + # store the original traceback into the new exc - new_exc._org_exc_tb = traceback.format_exc(exc_traceback) + new_exc._org_exc_tb = traceback.format_tb(exc_traceback) try: raise new_exc.with_traceback(exc_traceback) diff --git a/vcsserver/config/settings_maker.py b/vcsserver/config/settings_maker.py --- a/vcsserver/config/settings_maker.py +++ b/vcsserver/config/settings_maker.py @@ -81,8 +81,9 @@ class SettingsMaker(object): @classmethod def _bool_func(cls, input_val): - if isinstance(input_val, unicode): - input_val = input_val.encode('utf8') + if isinstance(input_val, bytes): + # decode to str + input_val = input_val.decode('utf8') return str2bool(input_val) @classmethod diff --git a/vcsserver/echo_stub/echo_app.py b/vcsserver/echo_stub/echo_app.py --- a/vcsserver/echo_stub/echo_app.py +++ b/vcsserver/echo_stub/echo_app.py @@ -23,7 +23,7 @@ class EchoApp(object): status = '200 OK' headers = [('Content-Type', 'text/plain')] start_response(status, headers) - return ["ECHO"] + return [b"ECHO"] class EchoAppStream(object): @@ -42,7 +42,7 @@ class EchoAppStream(object): def generator(): for _ in range(1000000): - yield "ECHO" + yield b"ECHO_STREAM" return generator() diff --git a/vcsserver/echo_stub/remote_wsgi.py b/vcsserver/echo_stub/remote_wsgi.py --- a/vcsserver/echo_stub/remote_wsgi.py +++ b/vcsserver/echo_stub/remote_wsgi.py @@ -42,4 +42,4 @@ def _assert_valid_config(config): config = config.copy() # This is what git needs from config at this stage - config.pop('git_update_server_info') + config.pop(b'git_update_server_info') diff --git a/vcsserver/git_lfs/app.py b/vcsserver/git_lfs/app.py --- a/vcsserver/git_lfs/app.py +++ b/vcsserver/git_lfs/app.py @@ -19,13 +19,13 @@ import re import logging from wsgiref.util import FileWrapper -import simplejson as json from pyramid.config import Configurator from pyramid.response import Response, FileIter from pyramid.httpexceptions import ( HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden, HTTPUnprocessableEntity) +from vcsserver.lib.rc_json import json from vcsserver.git_lfs.lib import OidHandler, LFSOidStore from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator from vcsserver.utils import safe_int @@ -42,7 +42,7 @@ def write_response_error(http_exception, _exception = http_exception(content_type=content_type) _exception.content_type = content_type if text: - _exception.body = json.dumps({'message': text}) + _exception.text = json.dumps({'message': text}) log.debug('LFS: writing response of type %s to client with text:%s', http_exception, text) return _exception diff --git a/vcsserver/git_lfs/tests/test_lfs_app.py b/vcsserver/git_lfs/tests/test_lfs_app.py --- a/vcsserver/git_lfs/tests/test_lfs_app.py +++ b/vcsserver/git_lfs/tests/test_lfs_app.py @@ -18,8 +18,9 @@ import os import pytest from webtest.app import TestApp as WebObTestApp -import simplejson as json +from vcsserver.lib.rc_json import json +from vcsserver.utils import safe_bytes from vcsserver.git_lfs.app import create_app @@ -121,7 +122,7 @@ class TestLFSApplication(object): if not os.path.isdir(os.path.dirname(oid_path)): os.makedirs(os.path.dirname(oid_path)) with open(oid_path, 'wb') as f: - f.write('OID_CONTENT') + f.write(safe_bytes('OID_CONTENT')) params = {'operation': 'download', 'objects': [{'oid': oid, 'size': '1024'}]} @@ -212,7 +213,7 @@ class TestLFSApplication(object): if not os.path.isdir(os.path.dirname(oid_path)): os.makedirs(os.path.dirname(oid_path)) with open(oid_path, 'wb') as f: - f.write('OID_CONTENT') + f.write(safe_bytes('OID_CONTENT')) params = {'oid': oid, 'size': '1024'} response = git_lfs_app.post_json( @@ -228,7 +229,7 @@ class TestLFSApplication(object): if not os.path.isdir(os.path.dirname(oid_path)): os.makedirs(os.path.dirname(oid_path)) with open(oid_path, 'wb') as f: - f.write('OID_CONTENT') + f.write(safe_bytes('OID_CONTENT')) params = {'oid': oid, 'size': 11} response = git_lfs_app.post_json( @@ -252,7 +253,7 @@ class TestLFSApplication(object): if not os.path.isdir(os.path.dirname(oid_path)): os.makedirs(os.path.dirname(oid_path)) with open(oid_path, 'wb') as f: - f.write('OID_CONTENT') + f.write(safe_bytes('OID_CONTENT')) response = git_lfs_app.get( '/repo/info/lfs/objects/{oid}'.format(oid=oid)) diff --git a/vcsserver/git_lfs/tests/test_lib.py b/vcsserver/git_lfs/tests/test_lib.py --- a/vcsserver/git_lfs/tests/test_lib.py +++ b/vcsserver/git_lfs/tests/test_lib.py @@ -17,6 +17,7 @@ import os import pytest +from vcsserver.utils import safe_bytes from vcsserver.git_lfs.lib import OidHandler, LFSOidStore @@ -70,7 +71,7 @@ class TestOidHandler(object): os.makedirs(os.path.dirname(store.oid_path)) with open(store.oid_path, 'wb') as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) response, has_errors = oid_handler.exec_operation('download') @@ -86,7 +87,7 @@ class TestOidHandler(object): os.makedirs(os.path.dirname(store.oid_path)) with open(store.oid_path, 'wb') as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) oid_handler.obj_size = 7 response, has_errors = oid_handler.exec_operation('upload') assert has_errors is None @@ -98,7 +99,7 @@ class TestOidHandler(object): os.makedirs(os.path.dirname(store.oid_path)) with open(store.oid_path, 'wb') as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) oid_handler.obj_size = 10240 response, has_errors = oid_handler.exec_operation('upload') @@ -127,7 +128,7 @@ class TestLFSStore(object): engine = lfs_store.get_engine(mode='wb') with engine as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) assert os.path.isfile(oid_location) @@ -136,6 +137,6 @@ class TestLFSStore(object): assert lfs_store.has_oid() is False engine = lfs_store.get_engine(mode='wb') with engine as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) - assert lfs_store.has_oid() is True \ No newline at end of file + assert lfs_store.has_oid() is True diff --git a/vcsserver/hook_utils/__init__.py b/vcsserver/hook_utils/__init__.py --- a/vcsserver/hook_utils/__init__.py +++ b/vcsserver/hook_utils/__init__.py @@ -25,6 +25,7 @@ import logging import pkg_resources import vcsserver +from vcsserver.utils import safe_bytes log = logging.getLogger(__name__) @@ -70,11 +71,10 @@ def install_git_hooks(repo_path, bare, e log.debug('writing git %s hook file at %s !', h_type, _hook_file) try: with open(_hook_file, 'wb') as f: - template = template.replace( - '_TMPL_', vcsserver.__version__) - template = template.replace('_DATE_', timestamp) - template = template.replace('_ENV_', executable) - template = template.replace('_PATH_', path) + template = template.replace(b'_TMPL_', safe_bytes(vcsserver.__version__)) + template = template.replace(b'_DATE_', safe_bytes(timestamp)) + template = template.replace(b'_ENV_', safe_bytes(executable)) + template = template.replace(b'_PATH_', safe_bytes(path)) f.write(template) os.chmod(_hook_file, 0o755) except IOError: @@ -124,11 +124,10 @@ def install_svn_hooks(repo_path, executa try: with open(_hook_file, 'wb') as f: - template = template.replace( - '_TMPL_', vcsserver.__version__) - template = template.replace('_DATE_', timestamp) - template = template.replace('_ENV_', executable) - template = template.replace('_PATH_', path) + template = template.replace(b'_TMPL_', safe_bytes(vcsserver.__version__)) + template = template.replace(b'_DATE_', safe_bytes(timestamp)) + template = template.replace(b'_ENV_', safe_bytes(executable)) + template = template.replace(b'_PATH_', safe_bytes(path)) f.write(template) os.chmod(_hook_file, 0o755) @@ -141,16 +140,16 @@ def install_svn_hooks(repo_path, executa def get_version_from_hook(hook_path): - version = '' + version = b'' hook_content = read_hook_content(hook_path) - matches = re.search(r'(?:RC_HOOK_VER)\s*=\s*(.*)', hook_content) + matches = re.search(rb'(?:RC_HOOK_VER)\s*=\s*(.*)', hook_content) if matches: try: version = matches.groups()[0] log.debug('got version %s from hooks.', version) except Exception: log.exception("Exception while reading the hook version.") - return version.replace("'", "") + return version.replace(b"'", b"") def check_rhodecode_hook(hook_path): diff --git a/vcsserver/hooks.py b/vcsserver/hooks.py --- a/vcsserver/hooks.py +++ b/vcsserver/hooks.py @@ -24,20 +24,23 @@ import logging import collections import importlib import base64 +import msgpack from http.client import HTTPConnection import mercurial.scmutil import mercurial.node -import simplejson as json +from vcsserver.lib.rc_json import json from vcsserver import exceptions, subprocessio, settings +from vcsserver.utils import safe_bytes log = logging.getLogger(__name__) class HooksHttpClient(object): + proto = 'msgpack.v1' connection = None def __init__(self, hooks_uri): @@ -45,30 +48,33 @@ class HooksHttpClient(object): def __call__(self, method, extras): connection = HTTPConnection(self.hooks_uri) - body = self._serialize(method, extras) + # binary msgpack body + headers, body = self._serialize(method, extras) try: - connection.request('POST', '/', body) - except Exception: - log.error('Hooks calling Connection failed on %s', connection.__dict__) + connection.request('POST', '/', body, headers) + except Exception as error: + log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error) raise response = connection.getresponse() - - response_data = response.read() - try: - return json.loads(response_data) + return msgpack.load(response, raw=False) except Exception: + response_data = response.read() log.exception('Failed to decode hook response json data. ' 'response_code:%s, raw_data:%s', response.status, response_data) raise - def _serialize(self, hook_name, extras): + @classmethod + def _serialize(cls, hook_name, extras): data = { 'method': hook_name, 'extras': extras } - return json.dumps(data) + headers = { + 'rc-hooks-protocol': cls.proto + } + return headers, msgpack.packb(data) class HooksDummyClient(object): @@ -113,7 +119,7 @@ class GitMessageWriter(RemoteMessageWrit self.stdout = stdout or sys.stdout def write(self, message): - self.stdout.write(message.encode('utf-8')) + self.stdout.write(safe_bytes(message)) class SvnMessageWriter(RemoteMessageWriter): @@ -439,15 +445,18 @@ def git_pre_pull(extras): :return: status code of the hook. 0 for success. :rtype: int """ + if 'pull' not in extras['hooks']: return HookResponse(0, '') stdout = io.BytesIO() try: status = _call_hook('pre_pull', extras, GitMessageWriter(stdout)) + except Exception as error: + log.exception('Failed to call pre_pull hook') status = 128 - stdout.write('ERROR: %s\n' % str(error)) + stdout.write(safe_bytes(f'ERROR: {error}\n')) return HookResponse(status, stdout.getvalue()) @@ -470,7 +479,7 @@ def git_post_pull(extras): status = _call_hook('post_pull', extras, GitMessageWriter(stdout)) except Exception as error: status = 128 - stdout.write('ERROR: %s\n' % error) + stdout.write(safe_bytes(f'ERROR: {error}\n')) return HookResponse(status, stdout.getvalue()) diff --git a/vcsserver/http_main.py b/vcsserver/http_main.py --- a/vcsserver/http_main.py +++ b/vcsserver/http_main.py @@ -15,6 +15,7 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +import io import os import sys import base64 @@ -28,9 +29,7 @@ import tempfile import psutil from itertools import chain -from io import StringIO -import simplejson as json import msgpack import configparser @@ -38,6 +37,7 @@ from pyramid.config import Configurator from pyramid.wsgi import wsgiapp from pyramid.response import Response +from vcsserver.lib.rc_json import json from vcsserver.config.settings_maker import SettingsMaker from vcsserver.utils import safe_int from vcsserver.lib.statsd_client import StatsdClient @@ -317,7 +317,8 @@ class HTTPApplication(object): def _vcs_view_params(self, request): remote = self._remotes[request.matchdict['backend']] - payload = msgpack.unpackb(request.body, use_list=True) + payload = msgpack.unpackb(request.body, use_list=True, raw=False) + method = payload.get('method') params = payload['params'] wire = params.get('wire') @@ -400,7 +401,7 @@ class HTTPApplication(object): resp = { 'id': payload_id, 'error': { - 'message': e.message, + 'message': str(e), 'traceback': tb_info, 'org_exc': org_exc_name, 'org_exc_tb': org_exc_tb, @@ -432,7 +433,7 @@ class HTTPApplication(object): raise def get_chunked_data(method_resp): - stream = StringIO(method_resp) + stream = io.BytesIO(method_resp) while 1: chunk = stream.read(chunk_size) if not chunk: @@ -577,7 +578,7 @@ class HTTPApplication(object): repo_name = environ['HTTP_X_RC_REPO_NAME'] packed_config = base64.b64decode( environ['HTTP_X_RC_REPO_CONFIG']) - config = msgpack.unpackb(packed_config) + config = msgpack.unpackb(packed_config, raw=False) environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO'] self.set_env_from_config(environ, config) diff --git a/vcsserver/lib/rc_cache/backends.py b/vcsserver/lib/rc_cache/backends.py --- a/vcsserver/lib/rc_cache/backends.py +++ b/vcsserver/lib/rc_cache/backends.py @@ -21,6 +21,7 @@ import logging import msgpack import redis +import pickle from dogpile.cache.api import CachedValue from dogpile.cache.backends import memory as memory_backend @@ -32,7 +33,7 @@ from dogpile.cache.util import memoized_ from pyramid.settings import asbool from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug -from vcsserver.utils import safe_str, safe_unicode +from vcsserver.utils import safe_str _default_max_size = 1024 @@ -265,7 +266,7 @@ class BaseRedisBackend(redis_backend.Red def get_mutex(self, key): if self.distributed_lock: - lock_key = '_lock_{0}'.format(safe_unicode(key)) + lock_key = '_lock_{0}'.format(safe_str(key)) return get_mutex_lock(self.client, lock_key, self._lock_timeout, auto_renewal=self._lock_auto_renewal) else: diff --git a/vcsserver/lib/rc_cache/utils.py b/vcsserver/lib/rc_cache/utils.py --- a/vcsserver/lib/rc_cache/utils.py +++ b/vcsserver/lib/rc_cache/utils.py @@ -23,7 +23,7 @@ import decorator from dogpile.cache import CacheRegion -from vcsserver.utils import safe_str, sha1 +from vcsserver.utils import safe_bytes, sha1 from vcsserver.lib.rc_cache import region_meta log = logging.getLogger(__name__) @@ -130,7 +130,7 @@ def compute_key_from_params(*args): """ Helper to compute key from given params to be used in cache manager """ - return sha1("_".join(map(safe_str, args))) + return sha1(safe_bytes("_".join(map(str, args)))) def backend_key_generator(backend): diff --git a/vcsserver/lib/rc_json.py b/vcsserver/lib/rc_json.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/rc_json.py @@ -0,0 +1,1 @@ +import simplejson as json diff --git a/vcsserver/pygrack.py b/vcsserver/pygrack.py --- a/vcsserver/pygrack.py +++ b/vcsserver/pygrack.py @@ -21,11 +21,13 @@ import os import socket import logging -import simplejson as json import dulwich.protocol +from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K from webob import Request, Response, exc +from vcsserver.lib.rc_json import json from vcsserver import hooks, subprocessio +from vcsserver.utils import ascii_bytes log = logging.getLogger(__name__) @@ -62,21 +64,20 @@ class FileWrapper(object): class GitRepository(object): """WSGI app for handling Git smart protocol endpoints.""" - git_folder_signature = frozenset( - ('config', 'head', 'info', 'objects', 'refs')) + git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs')) commands = frozenset(('git-upload-pack', 'git-receive-pack')) - valid_accepts = frozenset(('application/x-%s-result' % - c for c in commands)) + valid_accepts = frozenset(('application/x-{}-result'.format(c) for c in commands)) # The last bytes are the SHA1 of the first 12 bytes. EMPTY_PACK = ( - 'PACK\x00\x00\x00\x02\x00\x00\x00\x00' + - '\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e' + b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' + + b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e' ) - SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k')) + FLUSH_PACKET = b"0000" - def __init__(self, repo_name, content_path, git_path, update_server_info, - extras): + SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K)) + + def __init__(self, repo_name, content_path, git_path, update_server_info, extras): files = frozenset(f.lower() for f in os.listdir(content_path)) valid_dir_signature = self.git_folder_signature.issubset(files) @@ -123,7 +124,7 @@ class GitRepository(object): # It reads binary, per number of bytes specified. # if you do add '\n' as part of data, count it. server_advert = '# service=%s\n' % git_command - packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower() + packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower() try: gitenv = dict(os.environ) # forget all configs @@ -133,15 +134,15 @@ class GitRepository(object): out = subprocessio.SubprocessIOChunker( command, env=gitenv, - starting_values=[packet_len + server_advert + '0000'], + starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET], shell=False ) - except EnvironmentError: + except OSError: log.exception('Error processing command') raise exc.HTTPExpectationFailed() resp = Response() - resp.content_type = 'application/x-%s-advertisement' % str(git_command) + resp.content_type = f'application/x-{git_command}-advertisement' resp.charset = None resp.app_iter = out @@ -166,34 +167,100 @@ class GitRepository(object): We also print in the error output a message explaining why the command was aborted. - If aditionally, the user is accepting messages we send them the output + If additionally, the user is accepting messages we send them the output of the pre-pull hook. Note that for clients not supporting side-band we just send them the emtpy PACK file. """ + if self.SIDE_BAND_CAPS.intersection(capabilities): response = [] proto = dulwich.protocol.Protocol(None, response.append) - proto.write_pkt_line('NAK\n') - self._write_sideband_to_proto(pre_pull_messages, proto, - capabilities) + proto.write_pkt_line(dulwich.protocol.NAK_LINE) + + self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities) # N.B.(skreft): Do not change the sideband channel to 3, as that # produces a fatal error in the client: # fatal: error in sideband demultiplexer - proto.write_sideband(2, 'Pre pull hook failed: aborting\n') - proto.write_sideband(1, self.EMPTY_PACK) + proto.write_sideband( + dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, + ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True)) + proto.write_sideband( + dulwich.protocol.SIDE_BAND_CHANNEL_DATA, + ascii_bytes(self.EMPTY_PACK, allow_bytes=True)) - # writes 0000 + # writes b"0000" as default proto.write_pkt_line(None) return response else: - return [self.EMPTY_PACK] + return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)] + + def _build_post_pull_response(self, response, capabilities, start_message, end_message): + """ + Given a list response we inject the post-pull messages. + + We only inject the messages if the client supports sideband, and the + response has the format: + 0008NAK\n...0000 + + Note that we do not check the no-progress capability as by default, git + sends it, which effectively would block all messages. + """ + + if not self.SIDE_BAND_CAPS.intersection(capabilities): + return response + + if not start_message and not end_message: + return response + + try: + iter(response) + # iterator probably will work, we continue + except TypeError: + raise TypeError(f'response must be an iterator: got {type(response)}') + if isinstance(response, (list, tuple)): + raise TypeError(f'response must be an iterator: got {type(response)}') + + def injected_response(): - def _write_sideband_to_proto(self, data, proto, capabilities): + do_loop = 1 + header_injected = 0 + next_item = None + has_item = False + while do_loop: + + try: + next_item = next(response) + except StopIteration: + do_loop = 0 + + if has_item: + # last item ! alter it now + if do_loop == 0 and item.endswith(self.FLUSH_PACKET): + new_response = [item[:-4]] + new_response.extend(self._get_messages(end_message, capabilities)) + new_response.append(self.FLUSH_PACKET) + item = b''.join(new_response) + + yield item + has_item = True + item = next_item + + # alter item if it's the initial chunk + if not header_injected and item.startswith(b'0008NAK\n'): + new_response = [b'0008NAK\n'] + new_response.extend(self._get_messages(start_message, capabilities)) + new_response.append(item[8:]) + item = b''.join(new_response) + header_injected = 1 + + return injected_response() + + def _write_sideband_to_proto(self, proto, data, capabilities): """ - Write the data to the proto's sideband number 2. + Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS We do not use dulwich's write_sideband directly as it only supports side-band-64k. @@ -204,68 +271,27 @@ class GitRepository(object): # N.B.(skreft): The values below are explained in the pack protocol # documentation, section Packfile Data. # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt - if 'side-band-64k' in capabilities: + if CAPABILITY_SIDE_BAND_64K in capabilities: chunk_size = 65515 - elif 'side-band' in capabilities: + elif CAPABILITY_SIDE_BAND in capabilities: chunk_size = 995 else: return - chunker = ( - data[i:i + chunk_size] for i in range(0, len(data), chunk_size)) + chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size)) for chunk in chunker: - proto.write_sideband(2, chunk) + proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True)) def _get_messages(self, data, capabilities): """Return a list with packets for sending data in sideband number 2.""" response = [] proto = dulwich.protocol.Protocol(None, response.append) - self._write_sideband_to_proto(data, proto, capabilities) + self._write_sideband_to_proto(proto, data, capabilities) return response - def _inject_messages_to_response(self, response, capabilities, - start_messages, end_messages): - """ - Given a list response we inject the pre/post-pull messages. - - We only inject the messages if the client supports sideband, and the - response has the format: - 0008NAK\n...0000 - - Note that we do not check the no-progress capability as by default, git - sends it, which effectively would block all messages. - """ - if not self.SIDE_BAND_CAPS.intersection(capabilities): - return response - - if not start_messages and not end_messages: - return response - - # make a list out of response if it's an iterator - # so we can investigate it for message injection. - if hasattr(response, '__iter__'): - response = list(response) - - if (not response[0].startswith('0008NAK\n') or - not response[-1].endswith('0000')): - return response - - new_response = ['0008NAK\n'] - new_response.extend(self._get_messages(start_messages, capabilities)) - if len(response) == 1: - new_response.append(response[0][8:-4]) - else: - new_response.append(response[0][8:]) - new_response.extend(response[1:-1]) - new_response.append(response[-1][:-4]) - new_response.extend(self._get_messages(end_messages, capabilities)) - new_response.append('0000') - - return new_response - def backend(self, request, environ): """ WSGI Response producer for HTTP POST Git Smart HTTP requests. @@ -304,11 +330,11 @@ class GitRepository(object): inputstream = request.body_file_seekable resp = Response() - resp.content_type = ('application/x-%s-result' % - git_command.encode('utf8')) + resp.content_type = 'application/x-{}-result'.format(git_command) resp.charset = None pre_pull_messages = '' + # Upload-pack == clone if git_command == 'git-upload-pack': status, pre_pull_messages = hooks.git_pre_pull(self.extras) if status != 0: @@ -326,7 +352,7 @@ class GitRepository(object): out = subprocessio.SubprocessIOChunker( cmd, - inputstream=inputstream, + input_stream=inputstream, env=gitenv, cwd=self.content_path, shell=False, @@ -346,7 +372,7 @@ class GitRepository(object): log.debug('handling cmd %s', cmd) output = subprocessio.SubprocessIOChunker( cmd, - inputstream=inputstream, + input_stream=inputstream, env=gitenv, cwd=self.content_path, shell=False, @@ -357,10 +383,11 @@ class GitRepository(object): for _ in output: pass + # Upload-pack == clone if git_command == 'git-upload-pack': unused_status, post_pull_messages = hooks.git_post_pull(self.extras) - resp.app_iter = self._inject_messages_to_response( - out, capabilities, pre_pull_messages, post_pull_messages) + + resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages) else: resp.app_iter = out diff --git a/vcsserver/remote/git.py b/vcsserver/remote/git.py --- a/vcsserver/remote/git.py +++ b/vcsserver/remote/git.py @@ -40,7 +40,7 @@ from dulwich.repo import Repo as Dulwich from dulwich.server import update_server_info from vcsserver import exceptions, settings, subprocessio -from vcsserver.utils import safe_str, safe_int, safe_unicode +from vcsserver.utils import safe_str, safe_int from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo from vcsserver.hgcompat import ( hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler) @@ -56,13 +56,6 @@ PEELED_REF_MARKER = '^{}' log = logging.getLogger(__name__) -def str_to_dulwich(value): - """ - Dulwich 0.10.1a requires `unicode` objects to be passed in. - """ - return value.decode(settings.WIRE_ENCODING) - - def reraise_safe_exceptions(func): """Converts Dulwich exceptions to something neutral.""" @@ -116,7 +109,7 @@ class GitFactory(RepoFactory): if use_libgit2: return Repository(wire['path']) else: - repo_path = str_to_dulwich(wire['path']) + repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING) return Repo(repo_path) def repo(self, wire, create=False, use_libgit2=False): @@ -160,7 +153,7 @@ class GitRemote(RemoteBase): def discover_git_version(self): stdout, _ = self.run_git_command( {}, ['--version'], _bare=True, _safe=True) - prefix = 'git version' + prefix = b'git version' if stdout.startswith(prefix): stdout = stdout[len(prefix):] return stdout.strip() @@ -186,6 +179,7 @@ class GitRemote(RemoteBase): def assert_correct_path(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _assert_correct_path(_context_uid, _repo_id): try: @@ -219,6 +213,7 @@ class GitRemote(RemoteBase): def blob_raw_length(self, wire, sha): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _blob_raw_length(_repo_id, _sha): @@ -230,10 +225,10 @@ class GitRemote(RemoteBase): return _blob_raw_length(repo_id, sha) def _parse_lfs_pointer(self, raw_content): + spec_string = b'version https://git-lfs.github.com/spec' + if raw_content and raw_content.startswith(spec_string): - spec_string = 'version https://git-lfs.github.com/spec' - if raw_content and raw_content.startswith(spec_string): - pattern = re.compile(r""" + pattern = re.compile(rb""" (?:\n)? ^version[ ]https://git-lfs\.github\.com/spec/(?Pv\d+)\n ^oid[ ] sha256:(?P[0-9a-f]{64})\n @@ -249,8 +244,8 @@ class GitRemote(RemoteBase): @reraise_safe_exceptions def is_large_file(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) - region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) def _is_large_file(_repo_id, _sha): repo_init = self._factory.repo_libgit2(wire) @@ -266,8 +261,8 @@ class GitRemote(RemoteBase): @reraise_safe_exceptions def is_binary(self, wire, tree_id): cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) - region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) def _is_binary(_repo_id, _tree_id): repo_init = self._factory.repo_libgit2(wire) @@ -311,6 +306,7 @@ class GitRemote(RemoteBase): def bulk_request(self, wire, rev, pre_load): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _bulk_request(_repo_id, _rev, _pre_load): result = {} @@ -341,12 +337,12 @@ class GitRemote(RemoteBase): return urllib.request.build_opener(*handlers) - def _type_id_to_name(self, type_id): + def _type_id_to_name(self, type_id: int): return { - 1: b'commit', - 2: b'tree', - 3: b'blob', - 4: b'tag' + 1: 'commit', + 2: 'tree', + 3: 'blob', + 4: 'tag' }[type_id] @reraise_safe_exceptions @@ -674,7 +670,7 @@ class GitRemote(RemoteBase): for chunk in more_itertools.chunked(fetch_refs, 1024 * 4): fetch_refs_chunks = list(chunk) log.debug('Fetching %s refs from import url', len(fetch_refs_chunks)) - _out, _err = self.run_git_command( + self.run_git_command( wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks, fail_on_stderr=False, _copts=self._remote_conf(config), @@ -722,6 +718,7 @@ class GitRemote(RemoteBase): def get_object(self, wire, sha, maybe_unreachable=False): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_object(_context_uid, _repo_id, _sha): repo_init = self._factory.repo_libgit2(wire) @@ -766,7 +763,7 @@ class GitRemote(RemoteBase): raise exceptions.LookupException(e)(missing_commit_err) commit_id = commit.hex - type_id = commit.type_str + type_id = commit.type return { 'id': commit_id, @@ -781,6 +778,7 @@ class GitRemote(RemoteBase): def get_refs(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_refs(_context_uid, _repo_id): @@ -796,6 +794,7 @@ class GitRemote(RemoteBase): def get_branch_pointers(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_branch_pointers(_context_uid, _repo_id): @@ -811,6 +810,7 @@ class GitRemote(RemoteBase): def head(self, wire, show_exc=True): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _head(_context_uid, _repo_id, _show_exc): repo_init = self._factory.repo_libgit2(wire) @@ -837,6 +837,7 @@ class GitRemote(RemoteBase): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _revision(_context_uid, _repo_id, _rev): repo_init = self._factory.repo_libgit2(wire) @@ -856,6 +857,7 @@ class GitRemote(RemoteBase): def date(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _date(_repo_id, _commit_id): repo_init = self._factory.repo_libgit2(wire) @@ -876,6 +878,7 @@ class GitRemote(RemoteBase): def author(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _author(_repo_id, _commit_id): repo_init = self._factory.repo_libgit2(wire) @@ -893,7 +896,7 @@ class GitRemote(RemoteBase): try: return "{}".format(author.name) except Exception: - return "{}".format(safe_unicode(author.raw_name)) + return "{}".format(safe_str(author.raw_name)) return _author(repo_id, commit_id) @@ -930,6 +933,7 @@ class GitRemote(RemoteBase): def children(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _children(_repo_id, _commit_id): output, __ = self.run_git_command( @@ -990,6 +994,7 @@ class GitRemote(RemoteBase): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path): repo_init = self._factory.repo_libgit2(wire) @@ -1008,6 +1013,7 @@ class GitRemote(RemoteBase): def tree_items(self, wire, tree_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _tree_items(_repo_id, _tree_id): @@ -1066,7 +1072,7 @@ class GitRemote(RemoteBase): lines = diff.splitlines() x = 0 for line in lines: - if line.startswith('diff'): + if line.startswith(b'diff'): break x += 1 # Append new line just like 'diff' command do @@ -1110,6 +1116,7 @@ class GitRemote(RemoteBase): def node_history(self, wire, commit_id, path, limit): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit): # optimize for n==1, rev-list is much faster for that use-case @@ -1122,14 +1129,14 @@ class GitRemote(RemoteBase): cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path]) output, __ = self.run_git_command(wire, cmd) - commit_ids = re.findall(r'[0-9a-fA-F]{40}', output) + commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output) return [x for x in commit_ids] return _node_history(context_uid, repo_id, commit_id, path, limit) @reraise_safe_exceptions - def node_annotate(self, wire, commit_id, path): - + def node_annotate_legacy(self, wire, commit_id, path): + #note: replaced by pygit2 impelementation cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path] # -l ==> outputs long shas (and we need all 40 characters) # --root ==> doesn't put '^' character for boundaries @@ -1137,13 +1144,31 @@ class GitRemote(RemoteBase): output, __ = self.run_git_command(wire, cmd) result = [] - for i, blame_line in enumerate(output.split('\n')[:-1]): + for i, blame_line in enumerate(output.splitlines()[:-1]): line_no = i + 1 - commit_id, line = re.split(r' ', blame_line, 1) - result.append((line_no, commit_id, line)) + blame_commit_id, line = re.split(rb' ', blame_line, 1) + result.append((line_no, blame_commit_id, line)) + return result @reraise_safe_exceptions + def node_annotate(self, wire, commit_id, path): + + result_libgit = [] + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blame_obj = repo.blame(path, newest_commit=commit_id) + for i, line in enumerate(commit.tree[path].data.splitlines()): + line_no = i + 1 + hunk = blame_obj.for_line(line_no) + blame_commit_id = hunk.final_commit_id.hex + + result_libgit.append((line_no, blame_commit_id, line)) + + return result_libgit + + @reraise_safe_exceptions def update_server_info(self, wire): repo = self._factory.repo(wire) update_server_info(repo) @@ -1153,6 +1178,7 @@ class GitRemote(RemoteBase): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_all_commit_ids(_context_uid, _repo_id): @@ -1163,6 +1189,16 @@ class GitRemote(RemoteBase): except Exception: # Can be raised for empty repositories return [] + + @region.conditional_cache_on_arguments(condition=cache_on) + def _get_all_commit_ids_pygit2(_context_uid, _repo_id): + repo_init = self._factory.repo_libgit2(wire) + from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL + results = [] + with repo_init as repo: + for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE): + results.append(commit.id.hex) + return _get_all_commit_ids(context_uid, repo_id) @reraise_safe_exceptions @@ -1203,8 +1239,8 @@ class GitRemote(RemoteBase): _opts.update(opts) proc = subprocessio.SubprocessIOChunker(cmd, **_opts) - return ''.join(proc), ''.join(proc.error) - except (EnvironmentError, OSError) as err: + return b''.join(proc), b''.join(proc.stderr) + except OSError as err: cmd = ' '.join(cmd) # human friendly CMD tb_err = ("Couldn't run git command (%s).\n" "Original error was:%s\n" diff --git a/vcsserver/remote/hg.py b/vcsserver/remote/hg.py --- a/vcsserver/remote/hg.py +++ b/vcsserver/remote/hg.py @@ -39,6 +39,7 @@ from vcsserver.hgcompat import ( patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError, RepoLookupError, InterventionRequired, RequirementError, alwaysmatcher, patternmatcher, hgutil, hgext_strip) +from vcsserver.utils import ascii_bytes, ascii_str, safe_str from vcsserver.vcs_base import RemoteBase log = logging.getLogger(__name__) @@ -47,25 +48,31 @@ log = logging.getLogger(__name__) def make_ui_from_config(repo_config): class LoggingUI(ui.ui): + def status(self, *msg, **opts): - log.info(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).status(*msg, **opts) + str_msg = map(safe_str, msg) + log.info(' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).status(*msg, **opts) def warn(self, *msg, **opts): - log.warn(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).warn(*msg, **opts) + str_msg = map(safe_str, msg) + log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).warn(*msg, **opts) def error(self, *msg, **opts): - log.error(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).error(*msg, **opts) + str_msg = map(safe_str, msg) + log.error('ui_logger:'+' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).error(*msg, **opts) def note(self, *msg, **opts): - log.info(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).note(*msg, **opts) + str_msg = map(safe_str, msg) + log.info('ui_logger:'+' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).note(*msg, **opts) def debug(self, *msg, **opts): - log.debug(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).debug(*msg, **opts) + str_msg = map(safe_str, msg) + log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).debug(*msg, **opts) baseui = LoggingUI() @@ -75,26 +82,26 @@ def make_ui_from_config(repo_config): baseui._tcfg = hgconfig.config() for section, option, value in repo_config: - baseui.setconfig(section, option, value) + baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value)) # make our hgweb quiet so it doesn't print output - baseui.setconfig('ui', 'quiet', 'true') + baseui.setconfig(b'ui', b'quiet', b'true') - baseui.setconfig('ui', 'paginate', 'never') + baseui.setconfig(b'ui', b'paginate', b'never') # for better Error reporting of Mercurial - baseui.setconfig('ui', 'message-output', 'stderr') + baseui.setconfig(b'ui', b'message-output', b'stderr') # force mercurial to only use 1 thread, otherwise it may try to set a # signal in a non-main thread, thus generating a ValueError. - baseui.setconfig('worker', 'numcpus', 1) + baseui.setconfig(b'worker', b'numcpus', 1) # If there is no config for the largefiles extension, we explicitly disable # it here. This overrides settings from repositories hgrc file. Recent # mercurial versions enable largefiles in hgrc on clone from largefile # repo. - if not baseui.hasconfig('extensions', 'largefiles'): + if not baseui.hasconfig(b'extensions', b'largefiles'): log.debug('Explicitly disable largefiles extension for repo.') - baseui.setconfig('extensions', 'largefiles', '!') + baseui.setconfig(b'extensions', b'largefiles', b'!') return baseui @@ -106,19 +113,19 @@ def reraise_safe_exceptions(func): try: return func(*args, **kwargs) except (Abort, InterventionRequired) as e: - raise_from_original(exceptions.AbortException(e)) + raise_from_original(exceptions.AbortException(e), e) except RepoLookupError as e: - raise_from_original(exceptions.LookupException(e)) + raise_from_original(exceptions.LookupException(e), e) except RequirementError as e: - raise_from_original(exceptions.RequirementException(e)) + raise_from_original(exceptions.RequirementException(e), e) except RepoError as e: - raise_from_original(exceptions.VcsException(e)) + raise_from_original(exceptions.VcsException(e), e) except LookupError as e: - raise_from_original(exceptions.LookupException(e)) + raise_from_original(exceptions.LookupException(e), e) except Exception as e: if not hasattr(e, '_vcs_kind'): log.exception("Unhandled exception in hg remote call") - raise_from_original(exceptions.UnhandledException(e)) + raise_from_original(exceptions.UnhandledException(e), e) raise return wrapper @@ -144,7 +151,7 @@ class MercurialFactory(RepoFactory): def _create_repo(self, wire, create): baseui = self._create_config(wire["config"]) - return instance(baseui, wire["path"], create) + return instance(baseui, ascii_bytes(wire["path"]), create) def repo(self, wire, create=False): """ @@ -154,7 +161,7 @@ class MercurialFactory(RepoFactory): def patch_ui_message_output(baseui): - baseui.setconfig('ui', 'quiet', 'false') + baseui.setconfig(b'ui', b'quiet', b'false') output = io.BytesIO() def write(data, **unused_kwargs): @@ -466,6 +473,7 @@ class HgRemote(RemoteBase): def node_history(self, wire, revision, path, limit): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _node_history(_context_uid, _repo_id, _revision, _path, _limit): repo = self._factory.repo(wire) @@ -497,6 +505,7 @@ class HgRemote(RemoteBase): def node_history_untill(self, wire, revision, path, limit): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _node_history_until(_context_uid, _repo_id): repo = self._factory.repo(wire) @@ -530,7 +539,7 @@ class HgRemote(RemoteBase): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) fctx = ctx.filectx(path) - return fctx.data() + return fctx.data_queue() @reraise_safe_exceptions def fctx_flags(self, wire, commit_id, path): @@ -561,11 +570,12 @@ class HgRemote(RemoteBase): def get_all_commit_ids(self, wire, name): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_all_commit_ids(_context_uid, _repo_id, _name): repo = self._factory.repo(wire) repo = repo.filtered(name) - revs = [hex(x[7]) for x in repo.changelog.index] + revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()] return revs return _get_all_commit_ids(context_uid, repo_id, name) @@ -578,6 +588,7 @@ class HgRemote(RemoteBase): def is_large_file(self, wire, commit_id, path): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _is_large_file(_context_uid, _repo_id, _commit_id, _path): return largefiles.lfutil.isstandin(path) @@ -587,8 +598,8 @@ class HgRemote(RemoteBase): @reraise_safe_exceptions def is_binary(self, wire, revision, path): cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) - region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) def _is_binary(_repo_id, _sha, _path): repo = self._factory.repo(wire) @@ -666,12 +677,12 @@ class HgRemote(RemoteBase): repo = self._factory.repo(wire) # Disable any prompts for this repo - repo.ui.setconfig('ui', 'interactive', 'off', '-y') + repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y') bookmarks = list(dict(repo._bookmarks).keys()) remote = peer(repo, {}, url) # Disable any prompts for this remote - remote.ui.setconfig('ui', 'interactive', 'off', '-y') + remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y') return exchange.push( repo, remote, newbranch=True, bookmarks=bookmarks).cgresult @@ -878,11 +889,11 @@ class HgRemote(RemoteBase): def pull(self, wire, url, commit_ids=None): repo = self._factory.repo(wire) # Disable any prompts for this repo - repo.ui.setconfig('ui', 'interactive', 'off', '-y') + repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y') remote = peer(repo, {}, url) # Disable any prompts for this remote - remote.ui.setconfig('ui', 'interactive', 'off', '-y') + remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y') if commit_ids: commit_ids = [bin(commit_id) for commit_id in commit_ids] @@ -942,25 +953,25 @@ class HgRemote(RemoteBase): def merge(self, wire, revision): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) - repo.ui.setconfig('ui', 'merge', 'internal:dump') + repo.ui.setconfig(b'ui', b'merge', b'internal:dump') # In case of sub repositories are used mercurial prompts the user in # case of merge conflicts or different sub repository sources. By # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. - repo.ui.setconfig('ui', 'interactive', False) + repo.ui.setconfig(b'ui', b'interactive', False) commands.merge(baseui, repo, rev=revision) @reraise_safe_exceptions def merge_state(self, wire): repo = self._factory.repo(wire) - repo.ui.setconfig('ui', 'merge', 'internal:dump') + repo.ui.setconfig(b'ui', b'merge', b'internal:dump') # In case of sub repositories are used mercurial prompts the user in # case of merge conflicts or different sub repository sources. By # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. - repo.ui.setconfig('ui', 'interactive', False) + repo.ui.setconfig(b'ui', b'interactive', False) ms = hg_merge.mergestate(repo) return [x for x in ms.unresolved()] @@ -968,19 +979,19 @@ class HgRemote(RemoteBase): def commit(self, wire, message, username, close_branch=False): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) - repo.ui.setconfig('ui', 'username', username) + repo.ui.setconfig(b'ui', b'username', username) commands.commit(baseui, repo, message=message, close_branch=close_branch) @reraise_safe_exceptions def rebase(self, wire, source=None, dest=None, abort=False): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) - repo.ui.setconfig('ui', 'merge', 'internal:dump') + repo.ui.setconfig(b'ui', b'merge', b'internal:dump') # In case of sub repositories are used mercurial prompts the user in # case of merge conflicts or different sub repository sources. By # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. - repo.ui.setconfig('ui', 'interactive', False) + repo.ui.setconfig(b'ui', b'interactive', False) rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort) @reraise_safe_exceptions @@ -1039,7 +1050,7 @@ class HgRemote(RemoteBase): mode = b'x' in flags and 0o755 or 0o644 is_link = b'l' in flags - yield ArchiveNode(file_path, mode, is_link, ctx[fn].data) + yield ArchiveNode(file_path, mode, is_link, ctx[fn].data_queue) return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, archive_dir_name, commit_id) diff --git a/vcsserver/remote/svn.py b/vcsserver/remote/svn.py --- a/vcsserver/remote/svn.py +++ b/vcsserver/remote/svn.py @@ -407,14 +407,15 @@ class SvnRemote(RemoteBase): log.debug('Return process ended with code: %s', rdump.returncode) if rdump.returncode != 0: errors = rdump.stderr.read() - log.error('svnrdump dump failed: statuscode %s: message: %s', - rdump.returncode, errors) + log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors) + reason = 'UNKNOWN' - if 'svnrdump: E230001:' in errors: + if b'svnrdump: E230001:' in errors: reason = 'INVALID_CERTIFICATE' if reason == 'UNKNOWN': - reason = 'UNKNOWN:{}'.format(errors) + reason = 'UNKNOWN:{}'.format(safe_str(errors)) + raise Exception( 'Failed to dump the remote repository from %s. Reason:%s' % ( src_url, reason)) @@ -496,10 +497,10 @@ class SvnRemote(RemoteBase): try: _opts.update(opts) - p = subprocessio.SubprocessIOChunker(cmd, **_opts) + proc = subprocessio.SubprocessIOChunker(cmd, **_opts) - return ''.join(p), ''.join(p.error) - except (EnvironmentError, OSError) as err: + return b''.join(proc), b''.join(proc.stderr) + except OSError as err: if safe_call: return '', safe_str(err).strip() else: diff --git a/vcsserver/scm_app.py b/vcsserver/scm_app.py --- a/vcsserver/scm_app.py +++ b/vcsserver/scm_app.py @@ -27,7 +27,7 @@ import mercurial.hgweb.hgweb_mod import webob.exc from vcsserver import pygrack, exceptions, settings, git_lfs - +from vcsserver.utils import ascii_bytes log = logging.getLogger(__name__) @@ -115,10 +115,10 @@ def make_hg_ui_from_config(repo_config): baseui._tcfg = mercurial.config.config() for section, option, value in repo_config: - baseui.setconfig(section, option, value) + baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value)) # make our hgweb quiet so it doesn't print output - baseui.setconfig('ui', 'quiet', 'true') + baseui.setconfig(b'ui', b'quiet', b'true') return baseui @@ -135,7 +135,7 @@ def update_hg_ui_from_hgrc(baseui, repo_ for section in HG_UI_SECTIONS: for k, v in cfg.items(section): log.debug('settings ui from file: [%s] %s=%s', section, k, v) - baseui.setconfig(section, k, v) + baseui.setconfig(ascii_bytes(section), ascii_bytes(k), ascii_bytes(v)) def create_hg_wsgi_app(repo_path, repo_name, config): @@ -225,10 +225,10 @@ class GitLFSHandler(object): def create_git_lfs_wsgi_app(repo_path, repo_name, config): git_path = settings.GIT_EXECUTABLE - update_server_info = config.pop('git_update_server_info') - git_lfs_enabled = config.pop('git_lfs_enabled') - git_lfs_store_path = config.pop('git_lfs_store_path') - git_lfs_http_scheme = config.pop('git_lfs_http_scheme', 'http') + update_server_info = config.pop(b'git_update_server_info') + git_lfs_enabled = config.pop(b'git_lfs_enabled') + git_lfs_store_path = config.pop(b'git_lfs_store_path') + git_lfs_http_scheme = config.pop(b'git_lfs_http_scheme', 'http') app = GitLFSHandler( repo_path, repo_name, git_path, update_server_info, config) diff --git a/vcsserver/subprocessio.py b/vcsserver/subprocessio.py --- a/vcsserver/subprocessio.py +++ b/vcsserver/subprocessio.py @@ -23,15 +23,15 @@ along with git_http_backend.py Project. If not, see . """ import os +import collections import logging import subprocess -from collections import deque -from threading import Event, Thread +import threading log = logging.getLogger(__name__) -class StreamFeeder(Thread): +class StreamFeeder(threading.Thread): """ Normal writing into pipe-like is blocking once the buffer is filled. This thread allows a thread to seep data from a file-like into a pipe @@ -47,17 +47,11 @@ class StreamFeeder(Thread): if type(source) in (type(''), bytes, bytearray): # string-like self.bytes = bytes(source) else: # can be either file pointer or file-like - if type(source) in (int, int): # file pointer it is + if isinstance(source, int): # file pointer it is # converting file descriptor (int) stdin into file-like - try: - source = os.fdopen(source, 'rb', 16384) - except Exception: - pass + source = os.fdopen(source, 'rb', 16384) # let's see if source is file-like by now - try: - filelike = source.read - except Exception: - pass + filelike = hasattr(source, 'read') if not filelike and not self.bytes: raise TypeError("StreamFeeder's source object must be a readable " "file-like, a file descriptor, or a string-like.") @@ -65,25 +59,28 @@ class StreamFeeder(Thread): self.readiface, self.writeiface = os.pipe() def run(self): - t = self.writeiface + writer = self.writeiface try: if self.bytes: - os.write(t, self.bytes) + os.write(writer, self.bytes) else: s = self.source - b = s.read(4096) - while b: - os.write(t, b) - b = s.read(4096) + + while 1: + _bytes = s.read(4096) + if not _bytes: + break + os.write(writer, _bytes) + finally: - os.close(t) + os.close(writer) @property def output(self): return self.readiface -class InputStreamChunker(Thread): +class InputStreamChunker(threading.Thread): def __init__(self, source, target, buffer_size, chunk_size): super(InputStreamChunker, self).__init__() @@ -95,16 +92,16 @@ class InputStreamChunker(Thread): self.chunk_count_max = int(buffer_size / chunk_size) + 1 self.chunk_size = chunk_size - self.data_added = Event() + self.data_added = threading.Event() self.data_added.clear() - self.keep_reading = Event() + self.keep_reading = threading.Event() self.keep_reading.set() - self.EOF = Event() + self.EOF = threading.Event() self.EOF.clear() - self.go = Event() + self.go = threading.Event() self.go.set() def stop(self): @@ -146,7 +143,7 @@ class InputStreamChunker(Thread): try: b = s.read(cs) - except ValueError: + except ValueError: # probably "I/O operation on closed file" b = '' self.EOF.set() @@ -166,18 +163,20 @@ class BufferedGenerator(object): StopIteration after the last chunk of data is yielded. """ - def __init__(self, source, buffer_size=65536, chunk_size=4096, + def __init__(self, name, source, buffer_size=65536, chunk_size=4096, starting_values=None, bottomless=False): starting_values = starting_values or [] + self.name = name + self.buffer_size = buffer_size + self.chunk_size = chunk_size if bottomless: maxlen = int(buffer_size / chunk_size) else: maxlen = None - self.data = deque(starting_values, maxlen) - self.worker = InputStreamChunker(source, self.data, buffer_size, - chunk_size) + self.data_queue = collections.deque(starting_values, maxlen) + self.worker = InputStreamChunker(source, self.data_queue, buffer_size, chunk_size) if starting_values: self.worker.data_added.set() self.worker.start() @@ -185,17 +184,21 @@ class BufferedGenerator(object): #################### # Generator's methods #################### + def __str__(self): + return f'BufferedGenerator(name={self.name} chunk: {self.chunk_size} on buffer: {self.buffer_size})' def __iter__(self): return self def __next__(self): - while not len(self.data) and not self.worker.EOF.is_set(): + + while not self.length and not self.worker.EOF.is_set(): self.worker.data_added.clear() self.worker.data_added.wait(0.2) - if len(self.data): + + if self.length: self.worker.keep_reading.set() - return bytes(self.data.popleft()) + return bytes(self.data_queue.popleft()) elif self.worker.EOF.is_set(): raise StopIteration @@ -249,7 +252,7 @@ class BufferedGenerator(object): @property def done_reading(self): """ - Done_reding does not mean that the iterator's buffer is empty. + Done_reading does not mean that the iterator's buffer is empty. Iterator might have done reading from underlying source, but the read chunks might still be available for serving through .next() method. @@ -262,31 +265,31 @@ class BufferedGenerator(object): """ returns int. - This is the lenght of the que of chunks, not the length of + This is the length of the queue of chunks, not the length of the combined contents in those chunks. __len__() cannot be meaningfully implemented because this - reader is just flying throuh a bottomless pit content and - can only know the lenght of what it already saw. + reader is just flying through a bottomless pit content and + can only know the length of what it already saw. If __len__() on WSGI server per PEP 3333 returns a value, - the responce's length will be set to that. In order not to + the response's length will be set to that. In order not to confuse WSGI PEP3333 servers, we will not implement __len__ at all. """ - return len(self.data) + return len(self.data_queue) def prepend(self, x): - self.data.appendleft(x) + self.data_queue.appendleft(x) def append(self, x): - self.data.append(x) + self.data_queue.append(x) def extend(self, o): - self.data.extend(o) + self.data_queue.extend(o) def __getitem__(self, i): - return self.data[i] + return self.data_queue[i] class SubprocessIOChunker(object): @@ -314,7 +317,7 @@ class SubprocessIOChunker(object): - We are multithreaded. Writing in and reading out, err are all sep threads. - We support concurrent (in and out) stream processing. - - The output is not a stream. It's a queue of read string (bytes, not unicode) + - The output is not a stream. It's a queue of read string (bytes, not str) chunks. The object behaves as an iterable. You can "for chunk in obj:" us. - We are non-blocking in more respects than communicate() (reading from subprocess out pauses when internal buffer is full, but @@ -323,16 +326,16 @@ class SubprocessIOChunker(object): does not block the parallel inpipe reading occurring parallel thread.) The purpose of the object is to allow us to wrap subprocess interactions into - and interable that can be passed to a WSGI server as the application's return + an iterable that can be passed to a WSGI server as the application's return value. Because of stream-processing-ability, WSGI does not have to read ALL of the subprocess's output and buffer it, before handing it to WSGI server for HTTP response. Instead, the class initializer reads just a bit of the stream - to figure out if error ocurred or likely to occur and if not, just hands the + to figure out if error occurred or likely to occur and if not, just hands the further iteration over subprocess output to the server for completion of HTTP response. The real or perceived subprocess error is trapped and raised as one of - EnvironmentError family of exceptions + OSError family of exceptions Example usage: # try: @@ -342,7 +345,7 @@ class SubprocessIOChunker(object): # buffer_size = 65536, # chunk_size = 4096 # ) - # except (EnvironmentError) as e: + # except (OSError) as e: # print str(e) # raise e # @@ -358,15 +361,17 @@ class SubprocessIOChunker(object): _close_input_fd = None _closed = False + _stdout = None + _stderr = None - def __init__(self, cmd, inputstream=None, buffer_size=65536, + def __init__(self, cmd, input_stream=None, buffer_size=65536, chunk_size=4096, starting_values=None, fail_on_stderr=True, fail_on_return_code=True, **kwargs): """ Initializes SubprocessIOChunker :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings - :param inputstream: (Default: None) A file-like, string, or file pointer. + :param input_stream: (Default: None) A file-like, string, or file pointer. :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes. :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller. :param starting_values: (Default: []) An array of strings to put in front of output que. @@ -376,66 +381,81 @@ class SubprocessIOChunker(object): exception if the return code is not 0. """ + kwargs['shell'] = kwargs.get('shell', True) + starting_values = starting_values or [] - if inputstream: - input_streamer = StreamFeeder(inputstream) + if input_stream: + input_streamer = StreamFeeder(input_stream) input_streamer.start() - inputstream = input_streamer.output - self._close_input_fd = inputstream + input_stream = input_streamer.output + self._close_input_fd = input_stream self._fail_on_stderr = fail_on_stderr self._fail_on_return_code = fail_on_return_code - - _shell = kwargs.get('shell', True) - kwargs['shell'] = _shell + self.cmd = cmd - _p = subprocess.Popen(cmd, bufsize=-1, - stdin=inputstream, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + _p = subprocess.Popen(cmd, bufsize=-1, stdin=input_stream, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs) + self.process = _p - bg_out = BufferedGenerator(_p.stdout, buffer_size, chunk_size, - starting_values) - bg_err = BufferedGenerator(_p.stderr, 16000, 1, bottomless=True) + bg_out = BufferedGenerator('stdout', _p.stdout, buffer_size, chunk_size, starting_values) + bg_err = BufferedGenerator('stderr', _p.stderr, 10240, 1, bottomless=True) while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length: # doing this until we reach either end of file, or end of buffer. - bg_out.data_added_event.wait(1) + bg_out.data_added_event.wait(0.2) bg_out.data_added_event.clear() # at this point it's still ambiguous if we are done reading or just full buffer. # Either way, if error (returned by ended process, or implied based on # presence of stuff in stderr output) we error out. # Else, we are happy. - _returncode = _p.poll() + return_code = _p.poll() + ret_code_ok = return_code in [None, 0] + ret_code_fail = return_code is not None and return_code != 0 + if ( + (ret_code_fail and fail_on_return_code) or + (ret_code_ok and fail_on_stderr and bg_err.length) + ): - if ((_returncode and fail_on_return_code) or - (fail_on_stderr and _returncode is None and bg_err.length)): try: _p.terminate() except Exception: pass + bg_out.stop() + out = b''.join(bg_out) + self._stdout = out + bg_err.stop() - if fail_on_stderr: - err = ''.join(bg_err) - raise EnvironmentError( - "Subprocess exited due to an error:\n" + err) - if _returncode and fail_on_return_code: - err = ''.join(bg_err) + err = b''.join(bg_err) + self._stderr = err + + # code from https://github.com/schacon/grack/pull/7 + if err.strip() == b'fatal: The remote end hung up unexpectedly' and out.startswith(b'0034shallow '): + bg_out = iter([out]) + _p = None + elif err and fail_on_stderr: + text_err = err.decode() + raise OSError( + "Subprocess exited due to an error:\n{}".format(text_err)) + + if ret_code_fail and fail_on_return_code: + text_err = err.decode() if not err: # maybe get empty stderr, try stdout instead # in many cases git reports the errors on stdout too - err = ''.join(bg_out) - raise EnvironmentError( - "Subprocess exited with non 0 ret code:%s: stderr:%s" % ( - _returncode, err)) + text_err = out.decode() + raise OSError( + "Subprocess exited with non 0 ret code:{}: stderr:{}".format(return_code, text_err)) - self.process = _p - self.output = bg_out - self.error = bg_err - self.inputstream = inputstream + self.stdout = bg_out + self.stderr = bg_err + self.inputstream = input_stream + + def __str__(self): + proc = getattr(self, 'process', 'NO_PROCESS') + return f'SubprocessIOChunker: {proc}' def __iter__(self): return self @@ -449,27 +469,31 @@ class SubprocessIOChunker(object): result = None stop_iteration = None try: - result = next(self.output) + result = next(self.stdout) except StopIteration as e: stop_iteration = e - if self.process.poll() and self._fail_on_return_code: - err = '%s' % ''.join(self.error) - raise EnvironmentError( - "Subprocess exited due to an error:\n" + err) + if self.process: + return_code = self.process.poll() + ret_code_fail = return_code is not None and return_code != 0 + if ret_code_fail and self._fail_on_return_code: + self.stop_streams() + err = self.get_stderr() + raise OSError( + "Subprocess exited (exit_code:{}) due to an error during iteration:\n{}".format(return_code, err)) if stop_iteration: raise stop_iteration return result - def throw(self, type, value=None, traceback=None): - if self.output.length or not self.output.done_reading: - raise type(value) + def throw(self, exc_type, value=None, traceback=None): + if self.stdout.length or not self.stdout.done_reading: + raise exc_type(value) def close(self): if self._closed: return - self._closed = True + try: self.process.terminate() except Exception: @@ -477,11 +501,11 @@ class SubprocessIOChunker(object): if self._close_input_fd: os.close(self._close_input_fd) try: - self.output.close() + self.stdout.close() except Exception: pass try: - self.error.close() + self.stderr.close() except Exception: pass try: @@ -489,6 +513,24 @@ class SubprocessIOChunker(object): except Exception: pass + self._closed = True + + def stop_streams(self): + getattr(self.stdout, 'stop', lambda: None)() + getattr(self.stderr, 'stop', lambda: None)() + + def get_stdout(self): + if self._stdout: + return self._stdout + else: + return b''.join(self.stdout) + + def get_stderr(self): + if self._stderr: + return self._stderr + else: + return b''.join(self.stderr) + def run_command(arguments, env=None): """ @@ -506,8 +548,8 @@ def run_command(arguments, env=None): if env: _opts.update({'env': env}) proc = SubprocessIOChunker(cmd, **_opts) - return ''.join(proc), ''.join(proc.error) - except (EnvironmentError, OSError) as err: + return b''.join(proc), b''.join(proc.stderr) + except OSError as err: cmd = ' '.join(cmd) # human friendly CMD tb_err = ("Couldn't run subprocessio command (%s).\n" "Original error was:%s\n" % (cmd, err)) diff --git a/vcsserver/tests/test_git.py b/vcsserver/tests/test_git.py --- a/vcsserver/tests/test_git.py +++ b/vcsserver/tests/test_git.py @@ -48,7 +48,7 @@ def test_discover_git_version(git_remote class TestGitFetch(object): - def setup(self): + def setup_method(self): self.mock_repo = Mock() factory = Mock() factory.repo = Mock(return_value=self.mock_repo) @@ -92,7 +92,7 @@ class TestGitFetch(object): 'refs/tags/v0.1.3': '5a3a8fb005554692b16e21dee62bf02667d8dc3e', } - with patch('vcsserver.git.Repo', create=False) as mock_repo: + with patch('vcsserver.remote.git.Repo', create=False) as mock_repo: mock_repo().get_refs.return_value = sample_refs remote_refs = remote_git.get_remote_refs(wire={}, url=url) mock_repo().get_refs.assert_called_once_with() @@ -137,11 +137,14 @@ class TestReraiseSafeExceptions(object): class TestDulwichRepoWrapper(object): def test_calls_close_on_delete(self): isdir_patcher = patch('dulwich.repo.os.path.isdir', return_value=True) - with isdir_patcher: - repo = git.Repo('/tmp/abcde') - with patch.object(git.DulwichRepo, 'close') as close_mock: - del repo - close_mock.assert_called_once_with() + with patch.object(git.Repo, 'close') as close_mock: + with isdir_patcher: + repo = git.Repo('/tmp/abcde') + assert repo is not None + repo.__del__() + # can't use del repo as in python3 this isn't always calling .__del__() + + close_mock.assert_called_once_with() class TestGitFactory(object): diff --git a/vcsserver/tests/test_hg.py b/vcsserver/tests/test_hg.py --- a/vcsserver/tests/test_hg.py +++ b/vcsserver/tests/test_hg.py @@ -33,8 +33,8 @@ class TestDiff(object): factory = Mock() hg_remote = hg.HgRemote(factory) with patch('mercurial.patch.diff') as diff_mock: - diff_mock.side_effect = LookupError( - 'deadbeef', 'index', 'message') + diff_mock.side_effect = LookupError(b'deadbeef', b'index', b'message') + with pytest.raises(Exception) as exc_info: hg_remote.diff( wire={}, commit_id_1='deadbeef', commit_id_2='deadbee1', @@ -55,10 +55,10 @@ class TestReraiseSafeExceptions(object): assert method.__func__.__code__ == decorator.__code__ @pytest.mark.parametrize('side_effect, expected_type', [ - (hgcompat.Abort(), 'abort'), - (hgcompat.InterventionRequired(), 'abort'), + (hgcompat.Abort('failed-abort'), 'abort'), + (hgcompat.InterventionRequired('intervention-required'), 'abort'), (hgcompat.RepoLookupError(), 'lookup'), - (hgcompat.LookupError('deadbeef', 'index', 'message'), 'lookup'), + (hgcompat.LookupError(b'deadbeef', b'index', b'message'), 'lookup'), (hgcompat.RepoError(), 'error'), (hgcompat.RequirementError(), 'requirement'), ]) @@ -76,10 +76,9 @@ class TestReraiseSafeExceptions(object): @hg.reraise_safe_exceptions def fake_method(): try: - raise hgcompat.Abort() + raise hgcompat.Abort('test-abort') except: - self.original_traceback = traceback.format_tb( - sys.exc_info()[2]) + self.original_traceback = traceback.format_tb(sys.exc_info()[2]) raise try: diff --git a/vcsserver/tests/test_hgpatches.py b/vcsserver/tests/test_hgpatches.py --- a/vcsserver/tests/test_hgpatches.py +++ b/vcsserver/tests/test_hgpatches.py @@ -21,7 +21,7 @@ import pytest from vcsserver import hgcompat, hgpatches -LARGEFILES_CAPABILITY = 'largefiles=serve' +LARGEFILES_CAPABILITY = b'largefiles=serve' def test_patch_largefiles_capabilities_applies_patch( @@ -72,11 +72,6 @@ def test_dynamic_capabilities_uses_large assert LARGEFILES_CAPABILITY in caps -def test_hgsubversion_import(): - from hgsubversion import svnrepo - assert svnrepo - - @pytest.fixture def patched_capabilities(request): """ diff --git a/vcsserver/tests/test_hooks.py b/vcsserver/tests/test_hooks.py --- a/vcsserver/tests/test_hooks.py +++ b/vcsserver/tests/test_hooks.py @@ -15,17 +15,17 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -import contextlib -import io import threading +import msgpack + from http.server import BaseHTTPRequestHandler from socketserver import TCPServer import mercurial.ui import mock import pytest -import simplejson as json +from vcsserver.lib.rc_json import json from vcsserver import hooks @@ -44,7 +44,7 @@ def get_hg_ui(extras=None): } required_extras.update(extras) hg_ui = mercurial.ui.ui() - hg_ui.setconfig('rhodecode', 'RC_SCM_DATA', json.dumps(required_extras)) + hg_ui.setconfig(b'rhodecode', b'RC_SCM_DATA', json.dumps(required_extras)) return hg_ui @@ -67,6 +67,7 @@ def test_git_post_receive_is_disabled(): def test_git_post_receive_calls_repo_size(): extras = {'hooks': ['push', 'repo_size']} + with mock.patch.object(hooks, '_call_hook') as call_hook_mock: hooks.git_post_receive( None, '', {'RC_SCM_DATA': json.dumps(extras)}) @@ -81,6 +82,7 @@ def test_git_post_receive_calls_repo_siz def test_git_post_receive_does_not_call_disabled_repo_size(): extras = {'hooks': ['push']} + with mock.patch.object(hooks, '_call_hook') as call_hook_mock: hooks.git_post_receive( None, '', {'RC_SCM_DATA': json.dumps(extras)}) @@ -149,18 +151,19 @@ class TestHooksHttpClient(object): client = hooks.HooksHttpClient(uri) assert client.hooks_uri == uri - def test_serialize_returns_json_string(self): + def test_serialize_returns_serialized_string(self): client = hooks.HooksHttpClient('localhost:3000') hook_name = 'test' extras = { 'first': 1, 'second': 'two' } - result = client._serialize(hook_name, extras) - expected_result = json.dumps({ + hooks_proto, result = client._serialize(hook_name, extras) + expected_result = msgpack.packb({ 'method': hook_name, - 'extras': extras + 'extras': extras, }) + assert hooks_proto == {'rc-hooks-protocol': 'msgpack.v1'} assert result == expected_result def test_call_queries_http_server(self, http_mirror): @@ -171,10 +174,10 @@ class TestHooksHttpClient(object): 'second': 'two' } result = client(hook_name, extras) - expected_result = { + expected_result = msgpack.unpackb(msgpack.packb({ 'method': hook_name, 'extras': extras - } + }), raw=False) assert result == expected_result @@ -211,9 +214,10 @@ def http_mirror(request): class MirrorHttpHandler(BaseHTTPRequestHandler): + def do_POST(self): length = int(self.headers['Content-Length']) - body = self.rfile.read(length).decode('utf-8') + body = self.rfile.read(length) self.send_response(200) self.end_headers() self.wfile.write(body) diff --git a/vcsserver/tests/test_http_performance.py b/vcsserver/tests/test_http_performance.py --- a/vcsserver/tests/test_http_performance.py +++ b/vcsserver/tests/test_http_performance.py @@ -30,13 +30,13 @@ def data(): def test_http_app_streaming_with_data(data, repeat, vcs_app): app = vcs_app - for x in range(repeat / 10): + for x in range(repeat // 10): response = app.post('/stream/git/', params=data) assert response.status_code == 200 def test_http_app_streaming_no_data(repeat, vcs_app): app = vcs_app - for x in range(repeat / 10): + for x in range(repeat // 10): response = app.post('/stream/git/') assert response.status_code == 200 diff --git a/vcsserver/tests/test_install_hooks.py b/vcsserver/tests/test_install_hooks.py --- a/vcsserver/tests/test_install_hooks.py +++ b/vcsserver/tests/test_install_hooks.py @@ -23,7 +23,7 @@ import vcsserver import tempfile from vcsserver import hook_utils from vcsserver.tests.fixture import no_newline_id_generator -from vcsserver.utils import AttributeDict +from vcsserver.utils import AttributeDict, safe_bytes, safe_str class TestCheckRhodecodeHook(object): @@ -31,7 +31,7 @@ class TestCheckRhodecodeHook(object): def test_returns_false_when_hook_file_is_wrong_found(self, tmpdir): hook = os.path.join(str(tmpdir), 'fake_hook_file.py') with open(hook, 'wb') as f: - f.write('dummy test') + f.write(b'dummy test') result = hook_utils.check_rhodecode_hook(hook) assert result is False @@ -47,7 +47,7 @@ class TestCheckRhodecodeHook(object): def test_signatures(self, file_content, expected_result, tmpdir): hook = os.path.join(str(tmpdir), 'fake_hook_file_1.py') with open(hook, 'wb') as f: - f.write(file_content) + f.write(safe_bytes(file_content)) result = hook_utils.check_rhodecode_hook(hook) @@ -71,8 +71,7 @@ class BaseInstallHooks(object): content = hook_file.read() expected_env = '#!{}'.format(executable) - expected_rc_version = "\nRC_HOOK_VER = '{}'\n".format( - vcsserver.__version__) + expected_rc_version = "\nRC_HOOK_VER = '{}'\n".format(safe_str(vcsserver.__version__)) assert content.strip().startswith(expected_env) assert expected_rc_version in content diff --git a/vcsserver/tests/test_main_http.py b/vcsserver/tests/test_main_http.py --- a/vcsserver/tests/test_main_http.py +++ b/vcsserver/tests/test_main_http.py @@ -42,8 +42,7 @@ def test_applies_largefiles_patch_only_i ('bad', 'bad'), ('query&foo=bar', 'query&foo=bar'), ('equery&auth_token=bar', 'equery&auth_token=*****'), - ('a;b;c;query&foo=bar&auth_token=secret', - 'a&b&c&query&foo=bar&auth_token=*****'), + ('a;b;c;query&foo=bar&auth_token=secret', 'a;b;c;query&foo=bar&auth_token=*****'), ('', ''), (None, None), ('foo=bar', 'foo=bar'), diff --git a/vcsserver/tests/test_pygrack.py b/vcsserver/tests/test_pygrack.py --- a/vcsserver/tests/test_pygrack.py +++ b/vcsserver/tests/test_pygrack.py @@ -16,6 +16,7 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import io +import more_itertools import dulwich.protocol import mock @@ -26,6 +27,7 @@ import webtest from vcsserver import hooks, pygrack # pylint: disable=redefined-outer-name,protected-access +from vcsserver.utils import ascii_bytes @pytest.fixture() @@ -75,8 +77,7 @@ def test_pre_pull_hook_fails_with_sideba '0000', '0009done\n', ]) - with mock.patch('vcsserver.hooks.git_pre_pull', - return_value=hooks.HookResponse(1, 'foo')): + with mock.patch('vcsserver.hooks.git_pre_pull', return_value=hooks.HookResponse(1, 'foo')): response = pygrack_app.post( '/git-upload-pack', params=request, content_type='application/x-git-upload-pack') @@ -86,8 +87,8 @@ def test_pre_pull_hook_fails_with_sideba packets = list(proto.read_pkt_seq()) expected_packets = [ - 'NAK\n', '\x02foo', '\x02Pre pull hook failed: aborting\n', - '\x01' + pygrack.GitRepository.EMPTY_PACK, + b'NAK\n', b'\x02foo', b'\x02Pre pull hook failed: aborting\n', + b'\x01' + pygrack.GitRepository.EMPTY_PACK, ] assert packets == expected_packets @@ -120,7 +121,7 @@ def test_pull_has_hook_messages(pygrack_ with mock.patch('vcsserver.hooks.git_post_pull', return_value=hooks.HookResponse(1, 'bar')): with mock.patch('vcsserver.subprocessio.SubprocessIOChunker', - return_value=['0008NAK\n0009subp\n0000']): + return_value=more_itertools.always_iterable([b'0008NAK\n0009subp\n0000'])): response = pygrack_app.post( '/git-upload-pack', params=request, content_type='application/x-git-upload-pack') @@ -129,13 +130,13 @@ def test_pull_has_hook_messages(pygrack_ proto = dulwich.protocol.Protocol(data.read, None) packets = list(proto.read_pkt_seq()) - assert packets == ['NAK\n', '\x02foo', 'subp\n', '\x02bar'] + assert packets == [b'NAK\n', b'\x02foo', b'subp\n', b'\x02bar'] def test_get_want_capabilities(pygrack_instance): data = io.BytesIO( - '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' + - 'multi_ack side-band-64k ofs-delta\n00000009done\n') + b'0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' + + b'multi_ack side-band-64k ofs-delta\n00000009done\n') request = webob.Request({ 'wsgi.input': data, @@ -146,20 +147,20 @@ def test_get_want_capabilities(pygrack_i capabilities = pygrack_instance._get_want_capabilities(request) assert capabilities == frozenset( - ('ofs-delta', 'multi_ack', 'side-band-64k')) + (b'ofs-delta', b'multi_ack', b'side-band-64k')) assert data.tell() == 0 @pytest.mark.parametrize('data,capabilities,expected', [ ('foo', [], []), - ('', ['side-band-64k'], []), - ('', ['side-band'], []), - ('foo', ['side-band-64k'], ['0008\x02foo']), - ('foo', ['side-band'], ['0008\x02foo']), - ('f'*1000, ['side-band-64k'], ['03ed\x02' + 'f' * 1000]), - ('f'*1000, ['side-band'], ['03e8\x02' + 'f' * 995, '000a\x02fffff']), - ('f'*65520, ['side-band-64k'], ['fff0\x02' + 'f' * 65515, '000a\x02fffff']), - ('f'*65520, ['side-band'], ['03e8\x02' + 'f' * 995] * 65 + ['0352\x02' + 'f' * 845]), + ('', [pygrack.CAPABILITY_SIDE_BAND_64K], []), + ('', [pygrack.CAPABILITY_SIDE_BAND], []), + ('foo', [pygrack.CAPABILITY_SIDE_BAND_64K], [b'0008\x02foo']), + ('foo', [pygrack.CAPABILITY_SIDE_BAND], [b'0008\x02foo']), + ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'03ed\x02' + b'f' * 1000]), + ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995, b'000a\x02fffff']), + ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'fff0\x02' + b'f' * 65515, b'000a\x02fffff']), + ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995] * 65 + [b'0352\x02' + b'f' * 845]), ], ids=[ 'foo-empty', 'empty-64k', 'empty', @@ -174,54 +175,59 @@ def test_get_messages(pygrack_instance, @pytest.mark.parametrize('response,capabilities,pre_pull_messages,post_pull_messages', [ # Unexpected response - ('unexpected_response', ['side-band-64k'], 'foo', 'bar'), + ([b'unexpected_response[no_initial_header]'], [pygrack.CAPABILITY_SIDE_BAND_64K], 'foo', 'bar'), # No sideband - ('no-sideband', [], 'foo', 'bar'), + ([b'no-sideband'], [], 'foo', 'bar'), # No messages - ('no-messages', ['side-band-64k'], '', ''), + ([b'no-messages'], [pygrack.CAPABILITY_SIDE_BAND_64K], '', ''), ]) def test_inject_messages_to_response_nothing_to_do( - pygrack_instance, response, capabilities, pre_pull_messages, - post_pull_messages): - new_response = pygrack_instance._inject_messages_to_response( - response, capabilities, pre_pull_messages, post_pull_messages) + pygrack_instance, response, capabilities, pre_pull_messages, post_pull_messages): - assert new_response == response + new_response = pygrack_instance._build_post_pull_response( + more_itertools.always_iterable(response), capabilities, pre_pull_messages, post_pull_messages) + + assert list(new_response) == response @pytest.mark.parametrize('capabilities', [ - ['side-band'], - ['side-band-64k'], + [pygrack.CAPABILITY_SIDE_BAND], + [pygrack.CAPABILITY_SIDE_BAND_64K], ]) -def test_inject_messages_to_response_single_element(pygrack_instance, - capabilities): - response = ['0008NAK\n0009subp\n0000'] - new_response = pygrack_instance._inject_messages_to_response( - response, capabilities, 'foo', 'bar') +def test_inject_messages_to_response_single_element(pygrack_instance, capabilities): + response = [b'0008NAK\n0009subp\n0000'] + new_response = pygrack_instance._build_post_pull_response( + more_itertools.always_iterable(response), capabilities, 'foo', 'bar') - expected_response = [ - '0008NAK\n', '0008\x02foo', '0009subp\n', '0008\x02bar', '0000'] + expected_response = b''.join([ + b'0008NAK\n', + b'0008\x02foo', + b'0009subp\n', + b'0008\x02bar', + b'0000']) - assert new_response == expected_response + assert b''.join(new_response) == expected_response @pytest.mark.parametrize('capabilities', [ - ['side-band'], - ['side-band-64k'], + [pygrack.CAPABILITY_SIDE_BAND], + [pygrack.CAPABILITY_SIDE_BAND_64K], ]) -def test_inject_messages_to_response_multi_element(pygrack_instance, - capabilities): - response = [ - '0008NAK\n000asubp1\n', '000asubp2\n', '000asubp3\n', '000asubp4\n0000'] - new_response = pygrack_instance._inject_messages_to_response( - response, capabilities, 'foo', 'bar') +def test_inject_messages_to_response_multi_element(pygrack_instance, capabilities): + response = more_itertools.always_iterable([ + b'0008NAK\n000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n0000' + ]) + new_response = pygrack_instance._build_post_pull_response(response, capabilities, 'foo', 'bar') - expected_response = [ - '0008NAK\n', '0008\x02foo', '000asubp1\n', '000asubp2\n', '000asubp3\n', - '000asubp4\n', '0008\x02bar', '0000' - ] + expected_response = b''.join([ + b'0008NAK\n', + b'0008\x02foo', + b'000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n', + b'0008\x02bar', + b'0000' + ]) - assert new_response == expected_response + assert b''.join(new_response) == expected_response def test_build_failed_pre_pull_response_no_sideband(pygrack_instance): @@ -231,19 +237,52 @@ def test_build_failed_pre_pull_response_ @pytest.mark.parametrize('capabilities', [ - ['side-band'], - ['side-band-64k'], - ['side-band-64k', 'no-progress'], + [pygrack.CAPABILITY_SIDE_BAND], + [pygrack.CAPABILITY_SIDE_BAND_64K], + [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'], ]) def test_build_failed_pre_pull_response(pygrack_instance, capabilities): - response = pygrack_instance._build_failed_pre_pull_response( - capabilities, 'foo') + response = pygrack_instance._build_failed_pre_pull_response(capabilities, 'foo') expected_response = [ - '0008NAK\n', '0008\x02foo', '0024\x02Pre pull hook failed: aborting\n', - '%04x\x01%s' % (len(pygrack.GitRepository.EMPTY_PACK) + 5, - pygrack.GitRepository.EMPTY_PACK), - '0000', + b'0008NAK\n', b'0008\x02foo', b'0024\x02Pre pull hook failed: aborting\n', + b'%04x\x01%s' % (len(pygrack.GitRepository.EMPTY_PACK) + 5, pygrack.GitRepository.EMPTY_PACK), + pygrack.GitRepository.FLUSH_PACKET, ] assert response == expected_response + + +def test_inject_messages_to_response_generator(pygrack_instance): + + def response_generator(): + response = [ + # protocol start + b'0008NAK\n', + ] + response += [ascii_bytes(f'000asubp{x}\n') for x in range(1000)] + response += [ + # protocol end + pygrack.GitRepository.FLUSH_PACKET + ] + for elem in response: + yield elem + + new_response = pygrack_instance._build_post_pull_response( + response_generator(), [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'], 'PRE_PULL_MSG\n', 'POST_PULL_MSG\n') + + assert iter(new_response) + + expected_response = b''.join([ + # start + b'0008NAK\n0012\x02PRE_PULL_MSG\n', + ] + [ + # ... rest + ascii_bytes(f'000asubp{x}\n') for x in range(1000) + ] + [ + # final message, + b'0013\x02POST_PULL_MSG\n0000', + + ]) + + assert b''.join(new_response) == expected_response diff --git a/vcsserver/tests/test_scm_app.py b/vcsserver/tests/test_scm_app.py --- a/vcsserver/tests/test_scm_app.py +++ b/vcsserver/tests/test_scm_app.py @@ -25,10 +25,11 @@ import pytest import webtest from vcsserver import scm_app +from vcsserver.utils import ascii_bytes def test_hg_does_not_accept_invalid_cmd(tmpdir): - repo = mercurial.hg.repository(mercurial.ui.ui(), str(tmpdir), create=True) + repo = mercurial.hg.repository(mercurial.ui.ui(), ascii_bytes(str(tmpdir)), create=True) app = webtest.TestApp(scm_app.HgWeb(repo)) response = app.get('/repo?cmd=invalidcmd', expect_errors=True) @@ -37,7 +38,7 @@ def test_hg_does_not_accept_invalid_cmd( def test_create_hg_wsgi_app_requirement_error(tmpdir): - repo = mercurial.hg.repository(mercurial.ui.ui(), str(tmpdir), create=True) + repo = mercurial.hg.repository(mercurial.ui.ui(), ascii_bytes(str(tmpdir)), create=True) config = ( ('paths', 'default', ''), ) diff --git a/vcsserver/tests/test_subprocessio.py b/vcsserver/tests/test_subprocessio.py --- a/vcsserver/tests/test_subprocessio.py +++ b/vcsserver/tests/test_subprocessio.py @@ -22,12 +22,13 @@ import sys import pytest from vcsserver import subprocessio +from vcsserver.utils import ascii_bytes -class KindaFilelike(object): # pragma: no cover +class FileLikeObj(object): # pragma: no cover - def __init__(self, data, size): - chunks = size / len(data) + def __init__(self, data: bytes, size): + chunks = size // len(data) self.stream = self._get_stream(data, chunks) @@ -37,7 +38,7 @@ class KindaFilelike(object): # pragma: def read(self, n): - buffer_stream = '' + buffer_stream = b'' for chunk in self.stream: buffer_stream += chunk if len(buffer_stream) >= n: @@ -63,93 +64,92 @@ def _get_python_args(script): def test_raise_exception_on_non_zero_return_code(environ): - args = _get_python_args('sys.exit(1)') - with pytest.raises(EnvironmentError): - list(subprocessio.SubprocessIOChunker(args, shell=False, env=environ)) + call_args = _get_python_args('raise ValueError("fail")') + with pytest.raises(OSError): + b''.join(subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ)) def test_does_not_fail_on_non_zero_return_code(environ): - args = _get_python_args('sys.exit(1)') - output = ''.join( - subprocessio.SubprocessIOChunker( - args, shell=False, fail_on_return_code=False, env=environ - ) - ) + call_args = _get_python_args('sys.stdout.write("hello"); sys.exit(1)') + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_return_code=False, env=environ) + output = b''.join(proc) - assert output == '' + assert output == b'hello' def test_raise_exception_on_stderr(environ): - args = _get_python_args('sys.stderr.write("X"); time.sleep(1);') - with pytest.raises(EnvironmentError) as excinfo: - list(subprocessio.SubprocessIOChunker(args, shell=False, env=environ)) + call_args = _get_python_args('sys.stderr.write("WRITE_TO_STDERR"); time.sleep(1);') - assert 'exited due to an error:\nX' in str(excinfo.value) + with pytest.raises(OSError) as excinfo: + b''.join(subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ)) + + assert 'exited due to an error:\nWRITE_TO_STDERR' in str(excinfo.value) def test_does_not_fail_on_stderr(environ): - args = _get_python_args('sys.stderr.write("X"); time.sleep(1);') - output = ''.join( - subprocessio.SubprocessIOChunker( - args, shell=False, fail_on_stderr=False, env=environ - ) - ) + call_args = _get_python_args('sys.stderr.write("WRITE_TO_STDERR"); sys.stderr.flush; time.sleep(2);') + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_stderr=False, env=environ) + output = b''.join(proc) - assert output == '' + assert output == b'' -@pytest.mark.parametrize('size', [1, 10 ** 5]) +@pytest.mark.parametrize('size', [ + 1, + 10 ** 5 +]) def test_output_with_no_input(size, environ): - print((type(environ))) - data = 'X' - args = _get_python_args('sys.stdout.write("%s" * %d)' % (data, size)) - output = ''.join(subprocessio.SubprocessIOChunker(args, shell=False, env=environ)) + call_args = _get_python_args(f'sys.stdout.write("X" * {size});') + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ) + output = b''.join(proc) - assert output == data * size + assert output == ascii_bytes("X" * size) -@pytest.mark.parametrize('size', [1, 10 ** 5]) +@pytest.mark.parametrize('size', [ + 1, + 10 ** 5 +]) def test_output_with_no_input_does_not_fail(size, environ): - data = 'X' - args = _get_python_args('sys.stdout.write("%s" * %d); sys.exit(1)' % (data, size)) - output = ''.join( - subprocessio.SubprocessIOChunker( - args, shell=False, fail_on_return_code=False, env=environ - ) - ) - print(("{} {}".format(len(data * size), len(output)))) - assert output == data * size + call_args = _get_python_args(f'sys.stdout.write("X" * {size}); sys.exit(1)') + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_return_code=False, env=environ) + output = b''.join(proc) + + assert output == ascii_bytes("X" * size) -@pytest.mark.parametrize('size', [1, 10 ** 5]) +@pytest.mark.parametrize('size', [ + 1, + 10 ** 5 +]) def test_output_with_input(size, environ): data_len = size - inputstream = KindaFilelike('X', size) + inputstream = FileLikeObj(b'X', size) # This acts like the cat command. - args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)') - output = ''.join( - subprocessio.SubprocessIOChunker( - args, shell=False, inputstream=inputstream, env=environ - ) + call_args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)') + # note: in this tests we explicitly don't assign chunker to a variable and let it stream directly + output = b''.join( + subprocessio.SubprocessIOChunker(call_args, shell=False, input_stream=inputstream, env=environ) ) assert len(output) == data_len -@pytest.mark.parametrize('size', [1, 10 ** 5]) +@pytest.mark.parametrize('size', [ + 1, + 10 ** 5 +]) def test_output_with_input_skipping_iterator(size, environ): data_len = size - inputstream = KindaFilelike('X', size) + inputstream = FileLikeObj(b'X', size) # This acts like the cat command. - args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)') + call_args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)') # Note: assigning the chunker makes sure that it is not deleted too early - chunker = subprocessio.SubprocessIOChunker( - args, shell=False, inputstream=inputstream, env=environ - ) - output = ''.join(chunker.output) + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, input_stream=inputstream, env=environ) + output = b''.join(proc.stdout) assert len(output) == data_len diff --git a/vcsserver/tests/test_svn.py b/vcsserver/tests/test_svn.py --- a/vcsserver/tests/test_svn.py +++ b/vcsserver/tests/test_svn.py @@ -20,10 +20,12 @@ import mock import pytest import sys +from vcsserver.utils import ascii_bytes + class MockPopen(object): def __init__(self, stderr): - self.stdout = io.BytesIO('') + self.stdout = io.BytesIO(b'') self.stderr = io.BytesIO(stderr) self.returncode = 1 @@ -52,14 +54,13 @@ def test_import_remote_repository_certif remote.is_path_valid_repository = lambda wire, path: True with mock.patch('subprocess.Popen', - return_value=MockPopen(stderr)): + return_value=MockPopen(ascii_bytes(stderr))): with pytest.raises(Exception) as excinfo: remote.import_remote_repository({'path': 'path'}, 'url') - expected_error_args = ( - 'Failed to dump the remote repository from url. Reason:{}'.format(expected_reason),) + expected_error_args = 'Failed to dump the remote repository from url. Reason:{}'.format(expected_reason) - assert excinfo.value.args == expected_error_args + assert excinfo.value.args[0] == expected_error_args def test_svn_libraries_can_be_imported(): @@ -84,3 +85,19 @@ def test_username_password_extraction_fr remote.is_path_valid_repository = lambda wire, path: True assert remote.get_url_and_credentials(example_url) == parts + + +@pytest.mark.parametrize('call_url', [ + b'https://svn.code.sf.net/p/svnbook/source/trunk/', + b'https://marcink@svn.code.sf.net/p/svnbook/source/trunk/', + b'https://marcink:qweqwe@svn.code.sf.net/p/svnbook/source/trunk/', +]) +def test_check_url(call_url): + from vcsserver.remote import svn + factory = mock.Mock() + factory.repo = mock.Mock(return_value=mock.Mock()) + + remote = svn.SvnRemote(factory) + remote.is_path_valid_repository = lambda wire, path: True + assert remote.check_url(call_url) + diff --git a/vcsserver/tests/test_utils.py b/vcsserver/tests/test_utils.py new file mode 100644 --- /dev/null +++ b/vcsserver/tests/test_utils.py @@ -0,0 +1,53 @@ +# RhodeCode VCSServer provides access to different vcs backends via network. +# Copyright (C) 2014-2020 RhodeCode GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +import pytest +from vcsserver.utils import ascii_bytes, ascii_str + + +@pytest.mark.parametrize('given, expected', [ + ('a', b'a'), + (u'a', b'a'), +]) +def test_ascii_bytes(given, expected): + assert ascii_bytes(given) == expected + + +@pytest.mark.parametrize('given', [ + 'å', + 'å'.encode('utf8') +]) +def test_ascii_bytes_raises(given): + with pytest.raises(ValueError): + ascii_bytes(given) + + +@pytest.mark.parametrize('given, expected', [ + (b'a', 'a'), +]) +def test_ascii_str(given, expected): + assert ascii_str(given) == expected + + +@pytest.mark.parametrize('given', [ + u'a', + 'å'.encode('utf8'), + u'å' +]) +def test_ascii_str_raises(given): + with pytest.raises(ValueError): + ascii_str(given) diff --git a/vcsserver/tests/test_wsgi_app_caller.py b/vcsserver/tests/test_wsgi_app_caller.py --- a/vcsserver/tests/test_wsgi_app_caller.py +++ b/vcsserver/tests/test_wsgi_app_caller.py @@ -19,25 +19,26 @@ import wsgiref.simple_server import wsgiref.validate from vcsserver import wsgi_app_caller - - -# pylint: disable=protected-access,too-many-public-methods +from vcsserver.utils import ascii_bytes, safe_str @wsgiref.validate.validator def demo_app(environ, start_response): """WSGI app used for testing.""" + + input_data = safe_str(environ['wsgi.input'].read(1024)) + data = [ - 'Hello World!\n', - 'input_data=%s\n' % environ['wsgi.input'].read(), + f'Hello World!\n', + f'input_data={input_data}\n', ] for key, value in sorted(environ.items()): - data.append('%s=%s\n' % (key, value)) + data.append(f'{key}={value}\n') write = start_response("200 OK", [('Content-Type', 'text/plain')]) - write('Old school write method\n') - write('***********************\n') - return data + write(b'Old school write method\n') + write(b'***********************\n') + return list(map(ascii_bytes, data)) BASE_ENVIRON = { @@ -53,11 +54,11 @@ BASE_ENVIRON = { def test_complete_environ(): environ = dict(BASE_ENVIRON) - data = "data" + data = b"data" wsgi_app_caller._complete_environ(environ, data) wsgiref.validate.check_environ(environ) - assert data == environ['wsgi.input'].read() + assert data == environ['wsgi.input'].read(1024) def test_start_response(): @@ -81,16 +82,17 @@ def test_start_response_with_error(): def test_wsgi_app_caller(): - caller = wsgi_app_caller.WSGIAppCaller(demo_app) environ = dict(BASE_ENVIRON) input_data = 'some text' + + caller = wsgi_app_caller.WSGIAppCaller(demo_app) responses, status, headers = caller.handle(environ, input_data) - response = ''.join(responses) + response = b''.join(responses) assert status == '200 OK' assert headers == [('Content-Type', 'text/plain')] - assert response.startswith( - 'Old school write method\n***********************\n') - assert 'Hello World!\n' in response - assert 'foo.var=bla\n' in response - assert 'input_data=%s\n' % input_data in response + assert response.startswith(b'Old school write method\n***********************\n') + assert b'Hello World!\n' in response + assert b'foo.var=bla\n' in response + + assert ascii_bytes(f'input_data={input_data}\n') in response diff --git a/vcsserver/tweens/request_wrapper.py b/vcsserver/tweens/request_wrapper.py --- a/vcsserver/tweens/request_wrapper.py +++ b/vcsserver/tweens/request_wrapper.py @@ -19,8 +19,7 @@ import time import logging import vcsserver -from vcsserver.utils import safe_str - +from vcsserver.utils import safe_str, ascii_str log = logging.getLogger(__name__) @@ -62,7 +61,7 @@ class RequestWrapperTween(object): response = self.handler(request) finally: count = request.request_count() - _ver_ = vcsserver.__version__ + _ver_ = ascii_str(vcsserver.__version__) _path = safe_str(get_access_path(request.environ)) ip = '127.0.0.1' match_route = request.matched_route.name if request.matched_route else "NOT_FOUND" @@ -70,7 +69,7 @@ class RequestWrapperTween(object): total = time.time() - start - _view_path = "{}/{}@{}".format(_path, vcs_method, repo_name) + _view_path = f"{repo_name}@{_path}/{vcs_method}" log.info( 'Req[%4s] IP: %s %s Request to %s time: %.4fs [%s], VCSServer %s', count, ip, request.environ.get('REQUEST_METHOD'), diff --git a/vcsserver/utils.py b/vcsserver/utils.py --- a/vcsserver/utils.py +++ b/vcsserver/utils.py @@ -91,7 +91,39 @@ def safe_bytes(str_, from_encoding=None) except UnicodeDecodeError: pass - return unicode(str_, from_encoding[0], 'replace') + return str_.encode(from_encoding[0], 'replace') + + +def ascii_bytes(str_, allow_bytes=False) -> bytes: + """ + Simple conversion from str to bytes, with assumption that str_ is pure ASCII. + Fails with UnicodeError on invalid input. + This should be used where encoding and "safe" ambiguity should be avoided. + Where strings already have been encoded in other ways but still are unicode + string - for example to hex, base64, json, urlencoding, or are known to be + identifiers. + """ + if allow_bytes and isinstance(str_, bytes): + return str_ + + if not isinstance(str_, str): + raise ValueError('ascii_bytes cannot convert other types than str: got: {}'.format(type(str_))) + return str_.encode('ascii') + + +def ascii_str(str_): + """ + Simple conversion from bytes to str, with assumption that str_ is pure ASCII. + Fails with UnicodeError on invalid input. + This should be used where encoding and "safe" ambiguity should be avoided. + Where strings are encoded but also in other ways are known to be ASCII, and + where a unicode string is wanted without caring about encoding. For example + to hex, base64, urlencoding, or are known to be identifiers. + """ + + if not isinstance(str_, bytes): + raise ValueError('ascii_str cannot convert other types than bytes: got: {}'.format(type(str_))) + return str_.decode('ascii') class AttributeDict(dict): @@ -103,5 +135,3 @@ class AttributeDict(dict): def sha1(val): return hashlib.sha1(val).hexdigest() - - diff --git a/vcsserver/vcs_base.py b/vcsserver/vcs_base.py --- a/vcsserver/vcs_base.py +++ b/vcsserver/vcs_base.py @@ -17,6 +17,7 @@ from vcsserver.lib import rc_cache + class RemoteBase(object): EMPTY_COMMIT = '0' * 40 diff --git a/vcsserver/wsgi_app_caller.py b/vcsserver/wsgi_app_caller.py --- a/vcsserver/wsgi_app_caller.py +++ b/vcsserver/wsgi_app_caller.py @@ -23,19 +23,20 @@ import io import logging import os +from vcsserver.utils import ascii_bytes log = logging.getLogger(__name__) DEV_NULL = open(os.devnull) -def _complete_environ(environ, input_data): +def _complete_environ(environ, input_data: bytes): """Update the missing wsgi.* variables of a WSGI environment. :param environ: WSGI environment to update :type environ: dict :param input_data: data to be read by the app - :type input_data: str + :type input_data: bytes """ environ.update({ 'wsgi.version': (1, 0), @@ -92,20 +93,19 @@ class WSGIAppCaller(object): :param environ: WSGI environment to update :type environ: dict :param input_data: data to be read by the app - :type input_data: str + :type input_data: str/bytes :returns: a tuple with the contents, status and headers :rtype: (list, str, list<(str, str)>) """ - _complete_environ(environ, input_data) + _complete_environ(environ, ascii_bytes(input_data, allow_bytes=True)) start_response = _StartResponse() log.debug("Calling wrapped WSGI application") responses = self.app(environ, start_response) responses_list = list(responses) existing_responses = start_response.content if existing_responses: - log.debug( - "Adding returned response to response written via write()") + log.debug("Adding returned response to response written via write()") existing_responses.extend(responses_list) responses_list = existing_responses if hasattr(responses, 'close'):