rhodecode-vcsserver Commit - r1130:d68a72e0

vcsserver: modernize code for python3

super-admin -

r1130:d68a72e0 python3

parent child

vcsserver/config/settings_maker.py

0 +1 -1

              # Copyright (C) 2010-2023 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              import os
              import textwrap
              import string
              import functools
              import logging
              import tempfile
              import logging.config
              from vcsserver.type_utils import str2bool, aslist
              log = logging.getLogger(__name__)
              # skip keys, that are set here, so we don't double process those
              set_keys = {
                  '__file__': ''
              }
              class SettingsMaker(object):
                  def __init__(self, app_settings):
                      self.settings = app_settings
                  @classmethod
                  def _bool_func(cls, input_val):
                      if isinstance(input_val, bytes):
                          # decode to str
                          input_val = input_val.decode('utf8')
                      return str2bool(input_val)
                  @classmethod
                  def _int_func(cls, input_val):
                      return int(input_val)
                  @classmethod
                  def _list_func(cls, input_val, sep=','):
                      return aslist(input_val, sep=sep)
                  @classmethod
                  def _string_func(cls, input_val, lower=True):
                      if lower:
                          input_val = input_val.lower()
                      return input_val
                  @classmethod
                  def _float_func(cls, input_val):
                      return float(input_val)
                  @classmethod
                  def _dir_func(cls, input_val, ensure_dir=False, mode=0o755):
                      # ensure we have our dir created
                      if not os.path.isdir(input_val) and ensure_dir:
                          os.makedirs(input_val, mode=mode, exist_ok=True)
                      if not os.path.isdir(input_val):
-                         raise Exception('Dir at {} does not exist'.format(input_val))
+                         raise Exception(f'Dir at {input_val} does not exist')
                      return input_val
                  @classmethod
                  def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755):
                      dirname = os.path.dirname(input_val)
                      cls._dir_func(dirname, ensure_dir=ensure_dir)
                      return input_val
                  @classmethod
                  def _key_transformator(cls, key):
                      return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_'))
                  def maybe_env_key(self, key):
                      # now maybe we have this KEY in env, search and use the value with higher priority.
                      transformed_key = self._key_transformator(key)
                      envvar_value = os.environ.get(transformed_key)
                      if envvar_value:
                          log.debug('using `%s` key instead of `%s` key for config', transformed_key, key)
                      return envvar_value
                  def env_expand(self):
                      replaced = {}
                      for k, v in self.settings.items():
                          if k not in set_keys:
                              envvar_value = self.maybe_env_key(k)
                              if envvar_value:
                                  replaced[k] = envvar_value
                                  set_keys[k] = envvar_value
                      # replace ALL keys updated
                      self.settings.update(replaced)
                  def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'):
                      """
                      Helper to enable debug on running instance
                      :return:
                      """
                      if not str2bool(self.settings.get('logging.autoconfigure')):
                          log.info('logging configuration based on main .ini file')
                          return
                      if logging_conf is None:
                          logging_conf = self.settings.get('logging.logging_conf_file') or ''
                      if not os.path.isfile(logging_conf):
                          log.error('Unable to setup logging based on %s, '
                                    'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf)
                          return
                      with open(logging_conf, 'rt') as f:
                          ini_template = textwrap.dedent(f.read())
                          ini_template = string.Template(ini_template).safe_substitute(
                              RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level,
                              RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter
                          )
                      with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f:
                          log.info('Saved Temporary LOGGING config at %s', f.name)
                          f.write(ini_template)
                      logging.config.fileConfig(f.name)
                      os.remove(f.name)
                  def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None):
                      input_val = self.settings.get(key, default)
                      if default_when_empty and not input_val:
                          # use default value when value is set in the config but it is empty
                          input_val = default
                      parser_func = {
                          'bool': self._bool_func,
                          'int': self._int_func,
                          'list': self._list_func,
                          'list:newline': functools.partial(self._list_func, sep='/n'),
                          'list:spacesep': functools.partial(self._list_func, sep=' '),
                          'string': functools.partial(self._string_func, lower=lower),
                          'dir': self._dir_func,
                          'dir:ensured': functools.partial(self._dir_func, ensure_dir=True),
                          'file': self._file_path_func,
                          'file:ensured': functools.partial(self._file_path_func, ensure_dir=True),
                          None: lambda i: i
                      }[parser]
                      envvar_value = self.maybe_env_key(key)
                      if envvar_value:
                          input_val = envvar_value
                          set_keys[key] = input_val
                      self.settings[key] = parser_func(input_val)
                      return self.settings[key]

vcsserver/git_lfs/app.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import re
              import logging
              from wsgiref.util import FileWrapper
              from pyramid.config import Configurator
              from pyramid.response import Response, FileIter
              from pyramid.httpexceptions import (
                  HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden,
                  HTTPUnprocessableEntity)
              from vcsserver.lib.rc_json import json
              from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
              from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator
              from vcsserver.str_utils import safe_int
              log = logging.getLogger(__name__)
              GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' #+json ?
              GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
              def write_response_error(http_exception, text=None):
                  content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  _exception = http_exception(content_type=content_type)
                  _exception.content_type = content_type
                  if text:
                      _exception.body = json.dumps({'message': text})
                  log.debug('LFS: writing response of type %s to client with text:%s',
                            http_exception, text)
                  return _exception
              class AuthHeaderRequired(object):
                  """
                  Decorator to check if request has proper auth-header
                  """
                  def __call__(self, func):
                      return get_cython_compat_decorator(self.__wrapper, func)
                  def __wrapper(self, func, *fargs, **fkwargs):
                      request = fargs[1]
                      auth = request.authorization
                      if not auth:
                          return write_response_error(HTTPForbidden)
                      return func(*fargs[1:], **fkwargs)
              # views
              def lfs_objects(request):
                  # indicate not supported, V1 API
                  log.warning('LFS: v1 api not supported, reporting it back to client')
                  return write_response_error(HTTPNotImplemented, 'LFS: v1 api not supported')
              @AuthHeaderRequired()
              def lfs_objects_batch(request):
                  """
                  The client sends the following information to the Batch endpoint to transfer some objects:
                      operation - Should be download or upload.
                      transfers - An optional Array of String identifiers for transfer
                          adapters that the client has configured. If omitted, the basic
                          transfer adapter MUST be assumed by the server.
                      objects - An Array of objects to download.
                      oid - String OID of the LFS object.
                      size - Integer byte size of the LFS object. Must be at least zero.
                  """
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  auth = request.authorization
                  repo = request.matchdict.get('repo')
                  data = request.json
                  operation = data.get('operation')
                  http_scheme = request.registry.git_lfs_http_scheme
                  if operation not in ('download', 'upload'):
                      log.debug('LFS: unsupported operation:%s', operation)
                      return write_response_error(
                          HTTPBadRequest, 'unsupported operation mode: `%s`' % operation)
                  if 'objects' not in data:
                      log.debug('LFS: missing objects data')
                      return write_response_error(
                          HTTPBadRequest, 'missing objects data')
                  log.debug('LFS: handling operation of type: %s', operation)
                  objects = []
                  for o in data['objects']:
                      try:
                          oid = o['oid']
                          obj_size = o['size']
                      except KeyError:
                          log.exception('LFS, failed to extract data')
                          return write_response_error(
                              HTTPBadRequest, 'unsupported data in objects')
                      obj_data = {'oid': oid}
                      obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
                                                   _scheme=http_scheme)
                      obj_verify_href = request.route_url('lfs_objects_verify', repo=repo,
                                                          _scheme=http_scheme)
                      store = LFSOidStore(
                          oid, repo, store_location=request.registry.git_lfs_store_path)
                      handler = OidHandler(
                          store, repo, auth, oid, obj_size, obj_data,
                          obj_href, obj_verify_href)
                      # this verifies also OIDs
                      actions, errors = handler.exec_operation(operation)
                      if errors:
                          log.warning('LFS: got following errors: %s', errors)
                          obj_data['errors'] = errors
                      if actions:
                          obj_data['actions'] = actions
                      obj_data['size'] = obj_size
                      obj_data['authenticated'] = True
                      objects.append(obj_data)
                  result = {'objects': objects, 'transfer': 'basic'}
                  log.debug('LFS Response %s', safe_result(result))
                  return result
              def lfs_objects_oid_upload(request):
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  repo = request.matchdict.get('repo')
                  oid = request.matchdict.get('oid')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  engine = store.get_engine(mode='wb')
                  log.debug('LFS: starting chunked write of LFS oid: %s to storage', oid)
                  body = request.environ['wsgi.input']
                  with engine as f:
                      blksize = 64 * 1024  # 64kb
                      while True:
                          # read in chunks as stream comes in from Gunicorn
                          # this is a specific Gunicorn support function.
                          # might work differently on waitress
                          chunk = body.read(blksize)
                          if not chunk:
                              break
                          f.write(chunk)
                  return {'upload': 'ok'}
              def lfs_objects_oid_download(request):
                  repo = request.matchdict.get('repo')
                  oid = request.matchdict.get('oid')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
                          HTTPNotFound, 'requested file with oid `%s` not found in store' % oid)
                  # TODO(marcink): support range header ?
                  # Range: bytes=0-, `bytes=(\d+)\-.*`
                  f = open(store.oid_path, 'rb')
                  response = Response(
                      content_type='application/octet-stream', app_iter=FileIter(f))
                  response.headers.add('X-RC-LFS-Response-Oid', str(oid))
                  return response
              def lfs_objects_verify(request):
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  repo = request.matchdict.get('repo')
                  data = request.json
                  oid = data.get('oid')
                  size = safe_int(data.get('size'))
                  if not (oid and size):
                      return write_response_error(
                          HTTPBadRequest, 'missing oid and size in request data')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
                          HTTPNotFound, 'oid `%s` does not exists in store' % oid)
                  store_size = store.size_oid()
                  if store_size != size:
-                     msg = 'requested file size mismatch store size:%s requested:%s' % (
+                     msg = 'requested file size mismatch store size:{} requested:{}'.format(
                          store_size, size)
                      return write_response_error(
                          HTTPUnprocessableEntity, msg)
                  return {'message': {'size': 'ok', 'in_store': 'ok'}}
              def lfs_objects_lock(request):
                  return write_response_error(
                      HTTPNotImplemented, 'GIT LFS locking api not supported')
              def not_found(request):
                  return write_response_error(
                      HTTPNotFound, 'request path not found')
              def lfs_disabled(request):
                  return write_response_error(
                      HTTPNotImplemented, 'GIT LFS disabled for this repo')
              def git_lfs_app(config):
                  # v1 API deprecation endpoint
                  config.add_route('lfs_objects',
                                   '/{repo:.*?[^/]}/info/lfs/objects')
                  config.add_view(lfs_objects, route_name='lfs_objects',
                                  request_method='POST', renderer='json')
                  # locking API
                  config.add_route('lfs_objects_lock',
                                   '/{repo:.*?[^/]}/info/lfs/locks')
                  config.add_view(lfs_objects_lock, route_name='lfs_objects_lock',
                                  request_method=('POST', 'GET'), renderer='json')
                  config.add_route('lfs_objects_lock_verify',
                                   '/{repo:.*?[^/]}/info/lfs/locks/verify')
                  config.add_view(lfs_objects_lock, route_name='lfs_objects_lock_verify',
                                  request_method=('POST', 'GET'), renderer='json')
                  # batch API
                  config.add_route('lfs_objects_batch',
                                   '/{repo:.*?[^/]}/info/lfs/objects/batch')
                  config.add_view(lfs_objects_batch, route_name='lfs_objects_batch',
                                  request_method='POST', renderer='json')
                  # oid upload/download API
                  config.add_route('lfs_objects_oid',
                                   '/{repo:.*?[^/]}/info/lfs/objects/{oid}')
                  config.add_view(lfs_objects_oid_upload, route_name='lfs_objects_oid',
                                  request_method='PUT', renderer='json')
                  config.add_view(lfs_objects_oid_download, route_name='lfs_objects_oid',
                                  request_method='GET', renderer='json')
                  # verification API
                  config.add_route('lfs_objects_verify',
                                   '/{repo:.*?[^/]}/info/lfs/verify')
                  config.add_view(lfs_objects_verify, route_name='lfs_objects_verify',
                                  request_method='POST', renderer='json')
                  # not found handler for API
                  config.add_notfound_view(not_found, renderer='json')
              def create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
                  config = Configurator()
                  if git_lfs_enabled:
                      config.include(git_lfs_app)
                      config.registry.git_lfs_store_path = git_lfs_store_path
                      config.registry.git_lfs_http_scheme = git_lfs_http_scheme
                  else:
                      # not found handler for API, reporting disabled LFS support
                      config.add_notfound_view(lfs_disabled, renderer='json')
                  app = config.make_wsgi_app()
                  return app

vcsserver/git_lfs/lib.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import shutil
              import logging
              from collections import OrderedDict
              log = logging.getLogger(__name__)
              class OidHandler(object):
                  def __init__(self, store, repo_name, auth, oid, obj_size, obj_data, obj_href,
                               obj_verify_href=None):
                      self.current_store = store
                      self.repo_name = repo_name
                      self.auth = auth
                      self.oid = oid
                      self.obj_size = obj_size
                      self.obj_data = obj_data
                      self.obj_href = obj_href
                      self.obj_verify_href = obj_verify_href
                  def get_store(self, mode=None):
                      return self.current_store
                  def get_auth(self):
                      """returns auth header for re-use in upload/download"""
                      return " ".join(self.auth)
                  def download(self):
                      store = self.get_store()
                      response = None
                      has_errors = None
                      if not store.has_oid():
                          # error reply back to client that something is wrong with dl
-                         err_msg = 'object: {} does not exist in store'.format(store.oid)
+                         err_msg = f'object: {store.oid} does not exist in store'
                          has_errors = OrderedDict(
                              error=OrderedDict(
                                  code=404,
                                  message=err_msg
                              )
                          )
                      download_action = OrderedDict(
                          href=self.obj_href,
                          header=OrderedDict([("Authorization", self.get_auth())])
                      )
                      if not has_errors:
                          response = OrderedDict(download=download_action)
                      return response, has_errors
                  def upload(self, skip_existing=True):
                      """
                      Write upload action for git-lfs server
                      """
                      store = self.get_store()
                      response = None
                      has_errors = None
                      # verify if we have the OID before, if we do, reply with empty
                      if store.has_oid():
                          log.debug('LFS: store already has oid %s', store.oid)
                          # validate size
                          store_size = store.size_oid()
                          size_match = store_size == self.obj_size
                          if not size_match:
                              log.warning(
                                  'LFS: size mismatch for oid:%s, in store:%s expected: %s',
                                  self.oid, store_size, self.obj_size)
                          elif skip_existing:
                              log.debug('LFS: skipping further action as oid is existing')
                              return response, has_errors
                      chunked = ("Transfer-Encoding", "chunked")
                      upload_action = OrderedDict(
                          href=self.obj_href,
                          header=OrderedDict([("Authorization", self.get_auth()), chunked])
                      )
                      if not has_errors:
                          response = OrderedDict(upload=upload_action)
                          # if specified in handler, return the verification endpoint
                          if self.obj_verify_href:
                              verify_action = OrderedDict(
                                  href=self.obj_verify_href,
                                  header=OrderedDict([("Authorization", self.get_auth())])
                              )
                              response['verify'] = verify_action
                      return response, has_errors
                  def exec_operation(self, operation, *args, **kwargs):
                      handler = getattr(self, operation)
                      log.debug('LFS: handling request using %s handler', handler)
                      return handler(*args, **kwargs)
              class LFSOidStore(object):
                  def __init__(self, oid, repo, store_location=None):
                      self.oid = oid
                      self.repo = repo
                      self.store_path = store_location or self.get_default_store()
                      self.tmp_oid_path = os.path.join(self.store_path, oid + '.tmp')
                      self.oid_path = os.path.join(self.store_path, oid)
                      self.fd = None
                  def get_engine(self, mode):
                      """
                      engine = .get_engine(mode='wb')
                      with engine as f:
                          f.write('...')
                      """
                      class StoreEngine(object):
                          def __init__(self, mode, store_path, oid_path, tmp_oid_path):
                              self.mode = mode
                              self.store_path = store_path
                              self.oid_path = oid_path
                              self.tmp_oid_path = tmp_oid_path
                          def __enter__(self):
                              if not os.path.isdir(self.store_path):
                                  os.makedirs(self.store_path)
                              # TODO(marcink): maybe write metadata here with size/oid ?
                              fd = open(self.tmp_oid_path, self.mode)
                              self.fd = fd
                              return fd
                          def __exit__(self, exc_type, exc_value, traceback):
                              # close tmp file, and rename to final destination
                              self.fd.close()
                              shutil.move(self.tmp_oid_path, self.oid_path)
                      return StoreEngine(
                          mode, self.store_path, self.oid_path, self.tmp_oid_path)
                  def get_default_store(self):
                      """
                      Default store, consistent with defaults of Mercurial large files store
                      which is /home/username/.cache/largefiles
                      """
                      user_home = os.path.expanduser("~")
                      return os.path.join(user_home, '.cache', 'lfs-store')
                  def has_oid(self):
                      return os.path.exists(os.path.join(self.store_path, self.oid))
                  def size_oid(self):
                      size = -1
                      if self.has_oid():
                          oid = os.path.join(self.store_path, self.oid)
                          size = os.stat(oid).st_size
                      return size

vcsserver/hook_utils/__init__.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import re
              import os
              import sys
              import datetime
              import logging
              import pkg_resources
              import vcsserver
              from vcsserver.str_utils import safe_bytes
              log = logging.getLogger(__name__)
              def get_git_hooks_path(repo_path, bare):
                  hooks_path = os.path.join(repo_path, 'hooks')
                  if not bare:
                      hooks_path = os.path.join(repo_path, '.git', 'hooks')
                  return hooks_path
              def install_git_hooks(repo_path, bare, executable=None, force_create=False):
                  """
                  Creates a RhodeCode hook inside a git repository
                  :param repo_path: path to repository
                  :param executable: binary executable to put in the hooks
                  :param force_create: Create even if same name hook exists
                  """
                  executable = executable or sys.executable
                  hooks_path = get_git_hooks_path(repo_path, bare)
                  if not os.path.isdir(hooks_path):
                      os.makedirs(hooks_path, mode=0o777, exist_ok=True)
                  tmpl_post = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'git_post_receive.py.tmpl')))
                  tmpl_pre = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'git_pre_receive.py.tmpl')))
                  path = ''  # not used for now
                  timestamp = datetime.datetime.utcnow().isoformat()
                  for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
                      log.debug('Installing git hook in repo %s', repo_path)
                      _hook_file = os.path.join(hooks_path, '%s-receive' % h_type)
                      _rhodecode_hook = check_rhodecode_hook(_hook_file)
                      if _rhodecode_hook or force_create:
                          log.debug('writing git %s hook file at %s !', h_type, _hook_file)
                          try:
                              with open(_hook_file, 'wb') as f:
                                  template = template.replace(b'_TMPL_', safe_bytes(vcsserver.__version__))
                                  template = template.replace(b'_DATE_', safe_bytes(timestamp))
                                  template = template.replace(b'_ENV_', safe_bytes(executable))
                                  template = template.replace(b'_PATH_', safe_bytes(path))
                                  f.write(template)
                              os.chmod(_hook_file, 0o755)
-                         except IOError:
+                         except OSError:
                              log.exception('error writing hook file %s', _hook_file)
                      else:
                          log.debug('skipping writing hook file')
                  return True
              def get_svn_hooks_path(repo_path):
                  hooks_path = os.path.join(repo_path, 'hooks')
                  return hooks_path
              def install_svn_hooks(repo_path, executable=None, force_create=False):
                  """
                  Creates RhodeCode hooks inside a svn repository
                  :param repo_path: path to repository
                  :param executable: binary executable to put in the hooks
                  :param force_create: Create even if same name hook exists
                  """
                  executable = executable or sys.executable
                  hooks_path = get_svn_hooks_path(repo_path)
                  if not os.path.isdir(hooks_path):
                      os.makedirs(hooks_path, mode=0o777, exist_ok=True)
                  tmpl_post = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'svn_post_commit_hook.py.tmpl')))
                  tmpl_pre = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'svn_pre_commit_hook.py.tmpl')))
                  path = ''  # not used for now
                  timestamp = datetime.datetime.utcnow().isoformat()
                  for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
                      log.debug('Installing svn hook in repo %s', repo_path)
                      _hook_file = os.path.join(hooks_path, '%s-commit' % h_type)
                      _rhodecode_hook = check_rhodecode_hook(_hook_file)
                      if _rhodecode_hook or force_create:
                          log.debug('writing svn %s hook file at %s !', h_type, _hook_file)
                          try:
                              with open(_hook_file, 'wb') as f:
                                  template = template.replace(b'_TMPL_', safe_bytes(vcsserver.__version__))
                                  template = template.replace(b'_DATE_', safe_bytes(timestamp))
                                  template = template.replace(b'_ENV_', safe_bytes(executable))
                                  template = template.replace(b'_PATH_', safe_bytes(path))
                                  f.write(template)
                              os.chmod(_hook_file, 0o755)
-                         except IOError:
+                         except OSError:
                              log.exception('error writing hook file %s', _hook_file)
                      else:
                          log.debug('skipping writing hook file')
                  return True
              def get_version_from_hook(hook_path):
                  version = b''
                  hook_content = read_hook_content(hook_path)
                  matches = re.search(rb'RC_HOOK_VER\s*=\s*(.*)', hook_content)
                  if matches:
                      try:
                          version = matches.groups()[0]
                          log.debug('got version %s from hooks.', version)
                      except Exception:
                          log.exception("Exception while reading the hook version.")
                  return version.replace(b"'", b"")
              def check_rhodecode_hook(hook_path):
                  """
                  Check if the hook was created by RhodeCode
                  """
                  if not os.path.exists(hook_path):
                      return True
                  log.debug('hook exists, checking if it is from RhodeCode')
                  version = get_version_from_hook(hook_path)
                  if version:
                      return True
                  return False
              def read_hook_content(hook_path) -> bytes:
                  content = b''
                  if os.path.isfile(hook_path):
                      with open(hook_path, 'rb') as f:
                          content = f.read()
                  return content
              def get_git_pre_hook_version(repo_path, bare):
                  hooks_path = get_git_hooks_path(repo_path, bare)
                  _hook_file = os.path.join(hooks_path, 'pre-receive')
                  version = get_version_from_hook(_hook_file)
                  return version
              def get_git_post_hook_version(repo_path, bare):
                  hooks_path = get_git_hooks_path(repo_path, bare)
                  _hook_file = os.path.join(hooks_path, 'post-receive')
                  version = get_version_from_hook(_hook_file)
                  return version
              def get_svn_pre_hook_version(repo_path):
                  hooks_path = get_svn_hooks_path(repo_path)
                  _hook_file = os.path.join(hooks_path, 'pre-commit')
                  version = get_version_from_hook(_hook_file)
                  return version
              def get_svn_post_hook_version(repo_path):
                  hooks_path = get_svn_hooks_path(repo_path)
                  _hook_file = os.path.join(hooks_path, 'post-commit')
                  version = get_version_from_hook(_hook_file)
                  return version

vcsserver/lib/_vendor/jsonlogger/__init__.py

0 +2 -2

              '''
              This library is provided to allow standard python logging
              to output log data as JSON formatted strings
              '''
              import logging
              import json
              import re
              from datetime import date, datetime, time, tzinfo, timedelta
              import traceback
              import importlib
              from inspect import istraceback
              from collections import OrderedDict
              def _inject_req_id(record, *args, **kwargs):
                  return record
              ExceptionAwareFormatter = logging.Formatter
              ZERO = timedelta(0)
              HOUR = timedelta(hours=1)
              class UTC(tzinfo):
                  """UTC"""
                  def utcoffset(self, dt):
                      return ZERO
                  def tzname(self, dt):
                      return "UTC"
                  def dst(self, dt):
                      return ZERO
              utc = UTC()
              # skip natural LogRecord attributes
              # http://docs.python.org/library/logging.html#logrecord-attributes
              RESERVED_ATTRS = (
                  'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename',
                  'funcName', 'levelname', 'levelno', 'lineno', 'module',
                  'msecs', 'message', 'msg', 'name', 'pathname', 'process',
                  'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName')
              def merge_record_extra(record, target, reserved):
                  """
                  Merges extra attributes from LogRecord object into target dictionary
                  :param record: logging.LogRecord
                  :param target: dict to update
                  :param reserved: dict or list with reserved keys to skip
                  """
                  for key, value in record.__dict__.items():
                      # this allows to have numeric keys
                      if (key not in reserved
                          and not (hasattr(key, "startswith")
                                   and key.startswith('_'))):
                          target[key] = value
                  return target
              class JsonEncoder(json.JSONEncoder):
                  """
                  A custom encoder extending the default JSONEncoder
                  """
                  def default(self, obj):
                      if isinstance(obj, (date, datetime, time)):
                          return self.format_datetime_obj(obj)
                      elif istraceback(obj):
                          return ''.join(traceback.format_tb(obj)).strip()
                      elif type(obj) == Exception \
                              or isinstance(obj, Exception) \
                              or type(obj) == type:
                          return str(obj)
                      try:
-                         return super(JsonEncoder, self).default(obj)
+                         return super().default(obj)
                      except TypeError:
                          try:
                              return str(obj)
                          except Exception:
                              return None
                  def format_datetime_obj(self, obj):
                      return obj.isoformat()
              class JsonFormatter(ExceptionAwareFormatter):
                  """
                  A custom formatter to format logging records as json strings.
                  Extra values will be formatted as str() if not supported by
                  json default encoder
                  """
                  def __init__(self, *args, **kwargs):
                      """
                      :param json_default: a function for encoding non-standard objects
                          as outlined in http://docs.python.org/2/library/json.html
                      :param json_encoder: optional custom encoder
                      :param json_serializer: a :meth:`json.dumps`-compatible callable
                          that will be used to serialize the log record.
                      :param json_indent: an optional :meth:`json.dumps`-compatible numeric value
                          that will be used to customize the indent of the output json.
                      :param prefix: an optional string prefix added at the beginning of
                          the formatted string
                      :param json_indent: indent parameter for json.dumps
                      :param json_ensure_ascii: ensure_ascii parameter for json.dumps
                      :param reserved_attrs: an optional list of fields that will be skipped when
                          outputting json log record. Defaults to all log record attributes:
                          http://docs.python.org/library/logging.html#logrecord-attributes
                      :param timestamp: an optional string/boolean field to add a timestamp when
                          outputting the json log record. If string is passed, timestamp will be added
                          to log record using string as key. If True boolean is passed, timestamp key
                          will be "timestamp". Defaults to False/off.
                      """
                      self.json_default = self._str_to_fn(kwargs.pop("json_default", None))
                      self.json_encoder = self._str_to_fn(kwargs.pop("json_encoder", None))
                      self.json_serializer = self._str_to_fn(kwargs.pop("json_serializer", json.dumps))
                      self.json_indent = kwargs.pop("json_indent", None)
                      self.json_ensure_ascii = kwargs.pop("json_ensure_ascii", True)
                      self.prefix = kwargs.pop("prefix", "")
                      reserved_attrs = kwargs.pop("reserved_attrs", RESERVED_ATTRS)
                      self.reserved_attrs = dict(list(zip(reserved_attrs, reserved_attrs)))
                      self.timestamp = kwargs.pop("timestamp", True)
                      # super(JsonFormatter, self).__init__(*args, **kwargs)
                      logging.Formatter.__init__(self, *args, **kwargs)
                      if not self.json_encoder and not self.json_default:
                          self.json_encoder = JsonEncoder
                      self._required_fields = self.parse()
                      self._skip_fields = dict(list(zip(self._required_fields,
                                                    self._required_fields)))
                      self._skip_fields.update(self.reserved_attrs)
                  def _str_to_fn(self, fn_as_str):
                      """
                      If the argument is not a string, return whatever was passed in.
                      Parses a string such as package.module.function, imports the module
                      and returns the function.
                      :param fn_as_str: The string to parse. If not a string, return it.
                      """
                      if not isinstance(fn_as_str, str):
                          return fn_as_str
                      path, _, function = fn_as_str.rpartition('.')
                      module = importlib.import_module(path)
                      return getattr(module, function)
                  def parse(self):
                      """
                      Parses format string looking for substitutions
                      This method is responsible for returning a list of fields (as strings)
                      to include in all log messages.
                      """
                      standard_formatters = re.compile(r'\((.+?)\)', re.IGNORECASE)
                      return standard_formatters.findall(self._fmt)
                  def add_fields(self, log_record, record, message_dict):
                      """
                      Override this method to implement custom logic for adding fields.
                      """
                      for field in self._required_fields:
                          log_record[field] = record.__dict__.get(field)
                      log_record.update(message_dict)
                      merge_record_extra(record, log_record, reserved=self._skip_fields)
                      if self.timestamp:
                          key = self.timestamp if type(self.timestamp) == str else 'timestamp'
                          log_record[key] = datetime.fromtimestamp(record.created, tz=utc)
                  def process_log_record(self, log_record):
                      """
                      Override this method to implement custom logic
                      on the possibly ordered dictionary.
                      """
                      return log_record
                  def jsonify_log_record(self, log_record):
                      """Returns a json string of the log record."""
                      return self.json_serializer(log_record,
                                                  default=self.json_default,
                                                  cls=self.json_encoder,
                                                  indent=self.json_indent,
                                                  ensure_ascii=self.json_ensure_ascii)
                  def serialize_log_record(self, log_record):
                      """Returns the final representation of the log record."""
-                     return "%s%s" % (self.prefix, self.jsonify_log_record(log_record))
+                     return "{}{}".format(self.prefix, self.jsonify_log_record(log_record))
                  def format(self, record):
                      """Formats a log record and serializes to json"""
                      message_dict = {}
                      # FIXME: logging.LogRecord.msg and logging.LogRecord.message in typeshed
                      #        are always type of str. We shouldn't need to override that.
                      if isinstance(record.msg, dict):
                          message_dict = record.msg
                          record.message = None
                      else:
                          record.message = record.getMessage()
                      # only format time if needed
                      if "asctime" in self._required_fields:
                          record.asctime = self.formatTime(record, self.datefmt)
                      # Display formatted exception, but allow overriding it in the
                      # user-supplied dict.
                      if record.exc_info and not message_dict.get('exc_info'):
                          message_dict['exc_info'] = self.formatException(record.exc_info)
                      if not message_dict.get('exc_info') and record.exc_text:
                          message_dict['exc_info'] = record.exc_text
                      # Display formatted record of stack frames
                      # default format is a string returned from :func:`traceback.print_stack`
                      try:
                          if record.stack_info and not message_dict.get('stack_info'):
                              message_dict['stack_info'] = self.formatStack(record.stack_info)
                      except AttributeError:
                          # Python2.7 doesn't have stack_info.
                          pass
                      try:
                          log_record = OrderedDict()
                      except NameError:
                          log_record = {}
                      _inject_req_id(record, with_prefix=False)
                      self.add_fields(log_record, record, message_dict)
                      log_record = self.process_log_record(log_record)
                      return self.serialize_log_record(log_record)

vcsserver/lib/_vendor/redis_lock/__init__.py

0 +1 -1

              import sys
              import threading
              import weakref
              from base64 import b64encode
              from logging import getLogger
              from os import urandom
              from redis import StrictRedis
              __version__ = '3.7.0'
              loggers = {
                  k: getLogger("vcsserver." + ".".join((__name__, k)))
                  for k in [
                      "acquire",
                      "refresh.thread.start",
                      "refresh.thread.stop",
                      "refresh.thread.exit",
                      "refresh.start",
                      "refresh.shutdown",
                      "refresh.exit",
                      "release",
                  ]
              }
              text_type = str
              binary_type = bytes
              # Check if the id match. If not, return an error code.
              UNLOCK_SCRIPT = b"""
                  if redis.call("get", KEYS[1]) ~= ARGV[1] then
                      return 1
                  else
                      redis.call("del", KEYS[2])
                      redis.call("lpush", KEYS[2], 1)
                      redis.call("pexpire", KEYS[2], ARGV[2])
                      redis.call("del", KEYS[1])
                      return 0
                  end
              """
              # Covers both cases when key doesn't exist and doesn't equal to lock's id
              EXTEND_SCRIPT = b"""
                  if redis.call("get", KEYS[1]) ~= ARGV[1] then
                      return 1
                  elseif redis.call("ttl", KEYS[1]) < 0 then
                      return 2
                  else
                      redis.call("expire", KEYS[1], ARGV[2])
                      return 0
                  end
              """
              RESET_SCRIPT = b"""
                  redis.call('del', KEYS[2])
                  redis.call('lpush', KEYS[2], 1)
                  redis.call('pexpire', KEYS[2], ARGV[2])
                  return redis.call('del', KEYS[1])
              """
              RESET_ALL_SCRIPT = b"""
                  local locks = redis.call('keys', 'lock:*')
                  local signal
                  for _, lock in pairs(locks) do
                      signal = 'lock-signal:' .. string.sub(lock, 6)
                      redis.call('del', signal)
                      redis.call('lpush', signal, 1)
                      redis.call('expire', signal, 1)
                      redis.call('del', lock)
                  end
                  return #locks
              """
              class AlreadyAcquired(RuntimeError):
                  pass
              class NotAcquired(RuntimeError):
                  pass
              class AlreadyStarted(RuntimeError):
                  pass
              class TimeoutNotUsable(RuntimeError):
                  pass
              class InvalidTimeout(RuntimeError):
                  pass
              class TimeoutTooLarge(RuntimeError):
                  pass
              class NotExpirable(RuntimeError):
                  pass
              class Lock(object):
                  """
                  A Lock context manager implemented via redis SETNX/BLPOP.
                  """
                  unlock_script = None
                  extend_script = None
                  reset_script = None
                  reset_all_script = None
                  def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000):
                      """
                      :param redis_client:
                          An instance of :class:`~StrictRedis`.
                      :param name:
                          The name (redis key) the lock should have.
                      :param expire:
                          The lock expiry time in seconds. If left at the default (None)
                          the lock will not expire.
                      :param id:
                          The ID (redis value) the lock should have. A random value is
                          generated when left at the default.
                          Note that if you specify this then the lock is marked as "held". Acquires
                          won't be possible.
                      :param auto_renewal:
                          If set to ``True``, Lock will automatically renew the lock so that it
                          doesn't expire for as long as the lock is held (acquire() called
                          or running in a context manager).
                          Implementation note: Renewal will happen using a daemon thread with
                          an interval of ``expire*2/3``. If wishing to use a different renewal
                          time, subclass Lock, call ``super().__init__()`` then set
                          ``self._lock_renewal_interval`` to your desired interval.
                      :param strict:
                          If set ``True`` then the ``redis_client`` needs to be an instance of ``redis.StrictRedis``.
                      :param signal_expire:
                          Advanced option to override signal list expiration in milliseconds. Increase it for very slow clients. Default: ``1000``.
                      """
                      if strict and not isinstance(redis_client, StrictRedis):
                          raise ValueError("redis_client must be instance of StrictRedis. "
                                           "Use strict=False if you know what you're doing.")
                      if auto_renewal and expire is None:
                          raise ValueError("Expire may not be None when auto_renewal is set")
                      self._client = redis_client
                      if expire:
                          expire = int(expire)
                          if expire < 0:
                              raise ValueError("A negative expire is not acceptable.")
                      else:
                          expire = None
                      self._expire = expire
                      self._signal_expire = signal_expire
                      if id is None:
                          self._id = b64encode(urandom(18)).decode('ascii')
                      elif isinstance(id, binary_type):
                          try:
                              self._id = id.decode('ascii')
                          except UnicodeDecodeError:
                              self._id = b64encode(id).decode('ascii')
                      elif isinstance(id, text_type):
                          self._id = id
                      else:
                          raise TypeError("Incorrect type for `id`. Must be bytes/str not %s." % type(id))
                      self._name = 'lock:' + name
                      self._signal = 'lock-signal:' + name
                      self._lock_renewal_interval = (float(expire) * 2 / 3
                                                     if auto_renewal
                                                     else None)
                      self._lock_renewal_thread = None
                      self.register_scripts(redis_client)
                  @classmethod
                  def register_scripts(cls, redis_client):
                      global reset_all_script
                      if reset_all_script is None:
                          reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
                          cls.unlock_script = redis_client.register_script(UNLOCK_SCRIPT)
                          cls.extend_script = redis_client.register_script(EXTEND_SCRIPT)
                          cls.reset_script = redis_client.register_script(RESET_SCRIPT)
                          cls.reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
                  @property
                  def _held(self):
                      return self.id == self.get_owner_id()
                  def reset(self):
                      """
                      Forcibly deletes the lock. Use this with care.
                      """
                      self.reset_script(client=self._client, keys=(self._name, self._signal), args=(self.id, self._signal_expire))
                  @property
                  def id(self):
                      return self._id
                  def get_owner_id(self):
                      owner_id = self._client.get(self._name)
                      if isinstance(owner_id, binary_type):
                          owner_id = owner_id.decode('ascii', 'replace')
                      return owner_id
                  def acquire(self, blocking=True, timeout=None):
                      """
                      :param blocking:
                          Boolean value specifying whether lock should be blocking or not.
                      :param timeout:
                          An integer value specifying the maximum number of seconds to block.
                      """
                      logger = loggers["acquire"]
                      logger.debug("Getting blocking: %s acquire on %r ...", blocking, self._name)
                      if self._held:
                          owner_id = self.get_owner_id()
-                         raise AlreadyAcquired("Already acquired from this Lock instance. Lock id: {}".format(owner_id))
+                         raise AlreadyAcquired(f"Already acquired from this Lock instance. Lock id: {owner_id}")
                      if not blocking and timeout is not None:
                          raise TimeoutNotUsable("Timeout cannot be used if blocking=False")
                      if timeout:
                          timeout = int(timeout)
                          if timeout < 0:
                              raise InvalidTimeout("Timeout (%d) cannot be less than or equal to 0" % timeout)
                          if self._expire and not self._lock_renewal_interval and timeout > self._expire:
                              raise TimeoutTooLarge("Timeout (%d) cannot be greater than expire (%d)" % (timeout, self._expire))
                      busy = True
                      blpop_timeout = timeout or self._expire or 0
                      timed_out = False
                      while busy:
                          busy = not self._client.set(self._name, self._id, nx=True, ex=self._expire)
                          if busy:
                              if timed_out:
                                  return False
                              elif blocking:
                                  timed_out = not self._client.blpop(self._signal, blpop_timeout) and timeout
                              else:
                                  logger.warning("Failed to get %r.", self._name)
                                  return False
                      logger.debug("Got lock for %r.", self._name)
                      if self._lock_renewal_interval is not None:
                          self._start_lock_renewer()
                      return True
                  def extend(self, expire=None):
                      """Extends expiration time of the lock.
                      :param expire:
                          New expiration time. If ``None`` - `expire` provided during
                          lock initialization will be taken.
                      """
                      if expire:
                          expire = int(expire)
                          if expire < 0:
                              raise ValueError("A negative expire is not acceptable.")
                      elif self._expire is not None:
                          expire = self._expire
                      else:
                          raise TypeError(
                              "To extend a lock 'expire' must be provided as an "
                              "argument to extend() method or at initialization time."
                          )
                      error = self.extend_script(client=self._client, keys=(self._name, self._signal), args=(self._id, expire))
                      if error == 1:
                          raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
                      elif error == 2:
                          raise NotExpirable("Lock %s has no assigned expiration time" % self._name)
                      elif error:
                          raise RuntimeError("Unsupported error code %s from EXTEND script" % error)
                  @staticmethod
                  def _lock_renewer(lockref, interval, stop):
                      """
                      Renew the lock key in redis every `interval` seconds for as long
                      as `self._lock_renewal_thread.should_exit` is False.
                      """
                      while not stop.wait(timeout=interval):
                          loggers["refresh.thread.start"].debug("Refreshing lock")
                          lock = lockref()
                          if lock is None:
                              loggers["refresh.thread.stop"].debug(
                                  "The lock no longer exists, stopping lock refreshing"
                              )
                              break
                          lock.extend(expire=lock._expire)
                          del lock
                      loggers["refresh.thread.exit"].debug("Exit requested, stopping lock refreshing")
                  def _start_lock_renewer(self):
                      """
                      Starts the lock refresher thread.
                      """
                      if self._lock_renewal_thread is not None:
                          raise AlreadyStarted("Lock refresh thread already started")
                      loggers["refresh.start"].debug(
                          "Starting thread to refresh lock every %s seconds",
                          self._lock_renewal_interval
                      )
                      self._lock_renewal_stop = threading.Event()
                      self._lock_renewal_thread = threading.Thread(
                          group=None,
                          target=self._lock_renewer,
                          kwargs={'lockref': weakref.ref(self),
                                  'interval': self._lock_renewal_interval,
                                  'stop': self._lock_renewal_stop}
                      )
                      self._lock_renewal_thread.setDaemon(True)
                      self._lock_renewal_thread.start()
                  def _stop_lock_renewer(self):
                      """
                      Stop the lock renewer.
                      This signals the renewal thread and waits for its exit.
                      """
                      if self._lock_renewal_thread is None or not self._lock_renewal_thread.is_alive():
                          return
                      loggers["refresh.shutdown"].debug("Signalling the lock refresher to stop")
                      self._lock_renewal_stop.set()
                      self._lock_renewal_thread.join()
                      self._lock_renewal_thread = None
                      loggers["refresh.exit"].debug("Lock refresher has stopped")
                  def __enter__(self):
                      acquired = self.acquire(blocking=True)
                      assert acquired, "Lock wasn't acquired, but blocking=True"
                      return self
                  def __exit__(self, exc_type=None, exc_value=None, traceback=None):
                      self.release()
                  def release(self):
                      """Releases the lock, that was acquired with the same object.
                      .. note::
                          If you want to release a lock that you acquired in a different place you have two choices:
                          * Use ``Lock("name", id=id_from_other_place).release()``
                          * Use ``Lock("name").reset()``
                      """
                      if self._lock_renewal_thread is not None:
                          self._stop_lock_renewer()
                      loggers["release"].debug("Releasing %r.", self._name)
                      error = self.unlock_script(client=self._client, keys=(self._name, self._signal), args=(self._id, self._signal_expire))
                      if error == 1:
                          raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
                      elif error:
                          raise RuntimeError("Unsupported error code %s from EXTEND script." % error)
                  def locked(self):
                      """
                      Return true if the lock is acquired.
                      Checks that lock with same name already exists. This method returns true, even if
                      lock have another id.
                      """
                      return self._client.exists(self._name) == 1
              reset_all_script = None
              def reset_all(redis_client):
                  """
                  Forcibly deletes all locks if its remains (like a crash reason). Use this with care.
                  :param redis_client:
                      An instance of :class:`~StrictRedis`.
                  """
                  Lock.register_scripts(redis_client)
                  reset_all_script(client=redis_client)  # noqa

vcsserver/lib/_vendor/statsd/__init__.py

0 0 -2

-             import logging
              from .stream import TCPStatsClient, UnixSocketStatsClient  # noqa
              from .udp import StatsClient  # noqa
              HOST = 'localhost'
              PORT = 8125
              IPV6 = False
              PREFIX = None
              MAXUDPSIZE = 512
              log = logging.getLogger('rhodecode.statsd')
              def statsd_config(config, prefix='statsd.'):
                  _config = {}
                  for key in config.keys():
                      if key.startswith(prefix):
                          _config[key[len(prefix):]] = config[key]
                  return _config
              def client_from_config(configuration, prefix='statsd.', **kwargs):
                  from pyramid.settings import asbool
                  _config = statsd_config(configuration, prefix)
                  statsd_enabled = asbool(_config.pop('enabled', False))
                  if not statsd_enabled:
                      log.debug('statsd client not enabled by statsd.enabled =  flag, skipping...')
                      return
                  host = _config.pop('statsd_host', HOST)
                  port = _config.pop('statsd_port', PORT)
                  prefix = _config.pop('statsd_prefix', PREFIX)
                  maxudpsize = _config.pop('statsd_maxudpsize', MAXUDPSIZE)
                  ipv6 = asbool(_config.pop('statsd_ipv6', IPV6))
                  log.debug('configured statsd client %s:%s', host, port)
                  try:
                      client = StatsClient(
                          host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
                  except Exception:
                      log.exception('StatsD is enabled, but failed to connect to statsd server, fallback: disable statsd')
                      client = None
                  return client
              def get_statsd_client(request):
                  return client_from_config(request.registry.settings)

vcsserver/lib/_vendor/statsd/base.py

0 0 -2

-             import re
              import random
              from collections import deque
              from datetime import timedelta
              from repoze.lru import lru_cache
              from .timer import Timer
              TAG_INVALID_CHARS_RE = re.compile(
                  r"[^\w\d_\-:/\.]",
                  #re.UNICODE
              )
              TAG_INVALID_CHARS_SUBS = "_"
              # we save and expose methods called by statsd for discovery
              buckets_dict = {
              }
              @lru_cache(maxsize=500)
              def _normalize_tags_with_cache(tag_list):
                  return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list]
              def normalize_tags(tag_list):
                  # We have to turn our input tag list into a non-mutable tuple for it to
                  # be hashable (and thus usable) by the @lru_cache decorator.
                  return _normalize_tags_with_cache(tuple(tag_list))
              class StatsClientBase(object):
                  """A Base class for various statsd clients."""
                  def close(self):
                      """Used to close and clean up any underlying resources."""
                      raise NotImplementedError()
                  def _send(self):
                      raise NotImplementedError()
                  def pipeline(self):
                      raise NotImplementedError()
                  def timer(self, stat, rate=1, tags=None, auto_send=True):
                      """
                      statsd = StatsdClient.statsd
                      with statsd.timer('bucket_name', auto_send=True) as tmr:
                          # This block will be timed.
                          for i in range(0, 100000):
                              i ** 2
                      # you can access time here...
                      elapsed_ms = tmr.ms
                      """
                      return Timer(self, stat, rate, tags, auto_send=auto_send)
                  def timing(self, stat, delta, rate=1, tags=None, use_decimals=True):
                      """
                      Send new timing information.
                      `delta` can be either a number of milliseconds or a timedelta.
                      """
                      if isinstance(delta, timedelta):
                          # Convert timedelta to number of milliseconds.
                          delta = delta.total_seconds() * 1000.
                      if use_decimals:
                          fmt = '%0.6f|ms'
                      else:
                          fmt = '%s|ms'
                      self._send_stat(stat, fmt % delta, rate, tags)
                  def incr(self, stat, count=1, rate=1, tags=None):
                      """Increment a stat by `count`."""
                      self._send_stat(stat, '%s|c' % count, rate, tags)
                  def decr(self, stat, count=1, rate=1, tags=None):
                      """Decrement a stat by `count`."""
                      self.incr(stat, -count, rate, tags)
                  def gauge(self, stat, value, rate=1, delta=False, tags=None):
                      """Set a gauge value."""
                      if value < 0 and not delta:
                          if rate < 1:
                              if random.random() > rate:
                                  return
                          with self.pipeline() as pipe:
                              pipe._send_stat(stat, '0|g', 1)
                              pipe._send_stat(stat, '%s|g' % value, 1)
                      else:
                          prefix = '+' if delta and value >= 0 else ''
                          self._send_stat(stat, '%s%s|g' % (prefix, value), rate, tags)
                  def set(self, stat, value, rate=1):
                      """Set a set value."""
                      self._send_stat(stat, '%s|s' % value, rate)
                  def histogram(self, stat, value, rate=1, tags=None):
                      """Set a histogram"""
                      self._send_stat(stat, '%s|h' % value, rate, tags)
                  def _send_stat(self, stat, value, rate, tags=None):
                      self._after(self._prepare(stat, value, rate, tags))
                  def _prepare(self, stat, value, rate, tags=None):
                      global buckets_dict
                      buckets_dict[stat] = 1
                      if rate < 1:
                          if random.random() > rate:
                              return
                          value = '%s|@%s' % (value, rate)
                      if self._prefix:
                          stat = '%s.%s' % (self._prefix, stat)
                      res = '%s:%s%s' % (
                          stat,
                          value,
                          ("|#" + ",".join(normalize_tags(tags))) if tags else "",
                      )
                      return res
                  def _after(self, data):
                      if data:
                          self._send(data)
              class PipelineBase(StatsClientBase):
                  def __init__(self, client):
                      self._client = client
                      self._prefix = client._prefix
                      self._stats = deque()
                  def _send(self):
                      raise NotImplementedError()
                  def _after(self, data):
                      if data is not None:
                          self._stats.append(data)
                  def __enter__(self):
                      return self
                  def __exit__(self, typ, value, tb):
                      self.send()
                  def send(self):
                      if not self._stats:
                          return
                      self._send()
                  def pipeline(self):
                      return self.__class__(self)

vcsserver/lib/_vendor/statsd/stream.py

0 0 -2

-             import socket
              from .base import StatsClientBase, PipelineBase
              class StreamPipeline(PipelineBase):
                  def _send(self):
                      self._client._after('\n'.join(self._stats))
                      self._stats.clear()
              class StreamClientBase(StatsClientBase):
                  def connect(self):
                      raise NotImplementedError()
                  def close(self):
                      if self._sock and hasattr(self._sock, 'close'):
                          self._sock.close()
                      self._sock = None
                  def reconnect(self):
                      self.close()
                      self.connect()
                  def pipeline(self):
                      return StreamPipeline(self)
                  def _send(self, data):
                      """Send data to statsd."""
                      if not self._sock:
                          self.connect()
                      self._do_send(data)
                  def _do_send(self, data):
                      self._sock.sendall(data.encode('ascii') + b'\n')
              class TCPStatsClient(StreamClientBase):
                  """TCP version of StatsClient."""
                  def __init__(self, host='localhost', port=8125, prefix=None,
                               timeout=None, ipv6=False):
                      """Create a new client."""
                      self._host = host
                      self._port = port
                      self._ipv6 = ipv6
                      self._timeout = timeout
                      self._prefix = prefix
                      self._sock = None
                  def connect(self):
                      fam = socket.AF_INET6 if self._ipv6 else socket.AF_INET
                      family, _, _, _, addr = socket.getaddrinfo(
                          self._host, self._port, fam, socket.SOCK_STREAM)[0]
                      self._sock = socket.socket(family, socket.SOCK_STREAM)
                      self._sock.settimeout(self._timeout)
                      self._sock.connect(addr)
              class UnixSocketStatsClient(StreamClientBase):
                  """Unix domain socket version of StatsClient."""
                  def __init__(self, socket_path, prefix=None, timeout=None):
                      """Create a new client."""
                      self._socket_path = socket_path
                      self._timeout = timeout
                      self._prefix = prefix
                      self._sock = None
                  def connect(self):
                      self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
                      self._sock.settimeout(self._timeout)
                      self._sock.connect(self._socket_path)

vcsserver/lib/_vendor/statsd/timer.py

0 0 -2

-             import functools
              from time import perf_counter as time_now
              def safe_wraps(wrapper, *args, **kwargs):
                  """Safely wraps partial functions."""
                  while isinstance(wrapper, functools.partial):
                      wrapper = wrapper.func
                  return functools.wraps(wrapper, *args, **kwargs)
              class Timer(object):
                  """A context manager/decorator for statsd.timing()."""
                  def __init__(self, client, stat, rate=1, tags=None, use_decimals=True, auto_send=True):
                      self.client = client
                      self.stat = stat
                      self.rate = rate
                      self.tags = tags
                      self.ms = None
                      self._sent = False
                      self._start_time = None
                      self.use_decimals = use_decimals
                      self.auto_send = auto_send
                  def __call__(self, f):
                      """Thread-safe timing function decorator."""
                      @safe_wraps(f)
                      def _wrapped(*args, **kwargs):
                          start_time = time_now()
                          try:
                              return f(*args, **kwargs)
                          finally:
                              elapsed_time_ms = 1000.0 * (time_now() - start_time)
                              self.client.timing(self.stat, elapsed_time_ms, self.rate, self.tags, self.use_decimals)
                              self._sent = True
                      return _wrapped
                  def __enter__(self):
                      return self.start()
                  def __exit__(self, typ, value, tb):
                      self.stop(send=self.auto_send)
                  def start(self):
                      self.ms = None
                      self._sent = False
                      self._start_time = time_now()
                      return self
                  def stop(self, send=True):
                      if self._start_time is None:
                          raise RuntimeError('Timer has not started.')
                      dt = time_now() - self._start_time
                      self.ms = 1000.0 * dt  # Convert to milliseconds.
                      if send:
                          self.send()
                      return self
                  def send(self):
                      if self.ms is None:
                          raise RuntimeError('No data recorded.')
                      if self._sent:
                          raise RuntimeError('Already sent data.')
                      self._sent = True
                      self.client.timing(self.stat, self.ms, self.rate, self.tags, self.use_decimals)

vcsserver/lib/_vendor/statsd/udp.py

0 +1 -3

-             import socket
              from .base import StatsClientBase, PipelineBase
              class Pipeline(PipelineBase):
                  def __init__(self, client):
-                     super(Pipeline, self).__init__(client)
+                     super().__init__(client)
                      self._maxudpsize = client._maxudpsize
                  def _send(self):
                      data = self._stats.popleft()
                      while self._stats:
                          # Use popleft to preserve the order of the stats.
                          stat = self._stats.popleft()
                          if len(stat) + len(data) + 1 >= self._maxudpsize:
                              self._client._after(data)
                              data = stat
                          else:
                              data += '\n' + stat
                      self._client._after(data)
              class StatsClient(StatsClientBase):
                  """A client for statsd."""
                  def __init__(self, host='localhost', port=8125, prefix=None,
                               maxudpsize=512, ipv6=False):
                      """Create a new client."""
                      fam = socket.AF_INET6 if ipv6 else socket.AF_INET
                      family, _, _, _, addr = socket.getaddrinfo(
                          host, port, fam, socket.SOCK_DGRAM)[0]
                      self._addr = addr
                      self._sock = socket.socket(family, socket.SOCK_DGRAM)
                      self._prefix = prefix
                      self._maxudpsize = maxudpsize
                  def _send(self, data):
                      """Send data to statsd."""
                      try:
                          self._sock.sendto(data.encode('ascii'), self._addr)
                      except (socket.error, RuntimeError):
                          # No time for love, Dr. Jones!
                          pass
                  def close(self):
                      if self._sock and hasattr(self._sock, 'close'):
                          self._sock.close()
                      self._sock = None
                  def pipeline(self):
                      return Pipeline(self)

vcsserver/lib/logging_formatter.py

0 +1 -1

              # Copyright (C) 2010-2023 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              import sys
              import logging
              BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(30, 38))
              # Sequences
              RESET_SEQ = "\033[0m"
              COLOR_SEQ = "\033[0;%dm"
              BOLD_SEQ = "\033[1m"
              COLORS = {
                  'CRITICAL': MAGENTA,
                  'ERROR': RED,
                  'WARNING': CYAN,
                  'INFO': GREEN,
                  'DEBUG': BLUE,
                  'SQL': YELLOW
              }
              class ColorFormatter(logging.Formatter):
                  def format(self, record):
                      """
                      Change record's levelname to use with COLORS enum
                      """
-                     def_record = super(ColorFormatter, self).format(record)
+                     def_record = super().format(record)
                      levelname = record.levelname
                      start = COLOR_SEQ % (COLORS[levelname])
                      end = RESET_SEQ
                      colored_record = ''.join([start, def_record, end])
                      return colored_record

vcsserver/lib/memory_lru_dict.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import logging
              from repoze.lru import LRUCache
              from vcsserver.str_utils import safe_str
              log = logging.getLogger(__name__)
              class LRUDict(LRUCache):
                  """
                  Wrapper to provide partial dict access
                  """
                  def __setitem__(self, key, value):
                      return self.put(key, value)
                  def __getitem__(self, key):
                      return self.get(key)
                  def __contains__(self, key):
                      return bool(self.get(key))
                  def __delitem__(self, key):
                      del self.data[key]
                  def keys(self):
                      return list(self.data.keys())
              class LRUDictDebug(LRUDict):
                  """
                  Wrapper to provide some debug options
                  """
                  def _report_keys(self):
-                     elems_cnt = '{}/{}'.format(len(list(self.keys())), self.size)
+                     elems_cnt = f'{len(list(self.keys()))}/{self.size}'
                      # trick for pformat print it more nicely
                      fmt = '\n'
                      for cnt, elem in enumerate(self.keys()):
-                         fmt += '{} - {}\n'.format(cnt+1, safe_str(elem))
+                         fmt += f'{cnt+1} - {safe_str(elem)}\n'
                      log.debug('current LRU keys (%s):%s', elems_cnt, fmt)
                  def __getitem__(self, key):
                      self._report_keys()
                      return self.get(key)

vcsserver/lib/rc_cache/utils.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import functools
              import logging
              import os
              import threading
              import time
              import decorator
              from dogpile.cache import CacheRegion
              from vcsserver.utils import sha1
              from vcsserver.str_utils import safe_bytes
              from vcsserver.type_utils import str2bool
              from . import region_meta
              log = logging.getLogger(__name__)
              class RhodeCodeCacheRegion(CacheRegion):
                  def __repr__(self):
                      return f'{self.__class__}(name={self.name})'
                  def conditional_cache_on_arguments(
                          self, namespace=None,
                          expiration_time=None,
                          should_cache_fn=None,
                          to_str=str,
                          function_key_generator=None,
                          condition=True):
                      """
                      Custom conditional decorator, that will not touch any dogpile internals if
                      condition isn't meet. This works a bit different from should_cache_fn
                      And it's faster in cases we don't ever want to compute cached values
                      """
                      expiration_time_is_callable = callable(expiration_time)
                      if not namespace:
                          namespace = getattr(self, '_default_namespace', None)
                      if function_key_generator is None:
                          function_key_generator = self.function_key_generator
                      def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw):
                          if not condition:
                              log.debug('Calling un-cached method:%s', user_func.__name__)
                              start = time.time()
                              result = user_func(*arg, **kw)
                              total = time.time() - start
                              log.debug('un-cached method:%s took %.4fs', user_func.__name__, total)
                              return result
                          key = func_key_generator(*arg, **kw)
                          timeout = expiration_time() if expiration_time_is_callable \
                              else expiration_time
                          log.debug('Calling cached method:`%s`', user_func.__name__)
                          return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw))
                      def cache_decorator(user_func):
                          if to_str is str:
                              # backwards compatible
                              key_generator = function_key_generator(namespace, user_func)
                          else:
                              key_generator = function_key_generator(namespace, user_func, to_str=to_str)
                          def refresh(*arg, **kw):
                              """
                              Like invalidate, but regenerates the value instead
                              """
                              key = key_generator(*arg, **kw)
                              value = user_func(*arg, **kw)
                              self.set(key, value)
                              return value
                          def invalidate(*arg, **kw):
                              key = key_generator(*arg, **kw)
                              self.delete(key)
                          def set_(value, *arg, **kw):
                              key = key_generator(*arg, **kw)
                              self.set(key, value)
                          def get(*arg, **kw):
                              key = key_generator(*arg, **kw)
                              return self.get(key)
                          user_func.set = set_
                          user_func.invalidate = invalidate
                          user_func.get = get
                          user_func.refresh = refresh
                          user_func.key_generator = key_generator
                          user_func.original = user_func
                          # Use `decorate` to preserve the signature of :param:`user_func`.
                          return decorator.decorate(user_func, functools.partial(
                              get_or_create_for_user_func, key_generator))
                      return cache_decorator
              def make_region(*arg, **kw):
                  return RhodeCodeCacheRegion(*arg, **kw)
              def get_default_cache_settings(settings, prefixes=None):
                  prefixes = prefixes or []
                  cache_settings = {}
                  for key in settings.keys():
                      for prefix in prefixes:
                          if key.startswith(prefix):
                              name = key.split(prefix)[1].strip()
                              val = settings[key]
                              if isinstance(val, str):
                                  val = val.strip()
                              cache_settings[name] = val
                  return cache_settings
              def compute_key_from_params(*args):
                  """
                  Helper to compute key from given params to be used in cache manager
                  """
                  return sha1(safe_bytes("_".join(map(str, args))))
              def custom_key_generator(backend, namespace, fn):
                  func_name = fn.__name__
                  def generate_key(*args):
                      backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix'
                      namespace_pref = namespace or 'default_namespace'
                      arg_key = compute_key_from_params(*args)
                      final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}"
                      return final_key
                  return generate_key
              def backend_key_generator(backend):
                  """
                  Special wrapper that also sends over the backend to the key generator
                  """
                  def wrapper(namespace, fn):
                      return custom_key_generator(backend, namespace, fn)
                  return wrapper
              def get_or_create_region(region_name, region_namespace: str = None, use_async_runner=False):
                  from .backends import FileNamespaceBackend
                  from . import async_creation_runner
                  region_obj = region_meta.dogpile_cache_regions.get(region_name)
                  if not region_obj:
                      reg_keys = list(region_meta.dogpile_cache_regions.keys())
-                     raise EnvironmentError(f'Region `{region_name}` not in configured: {reg_keys}.')
+                     raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.')
                  region_uid_name = f'{region_name}:{region_namespace}'
                  if isinstance(region_obj.actual_backend, FileNamespaceBackend):
                      if not region_namespace:
                          raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param')
                      region_exist = region_meta.dogpile_cache_regions.get(region_namespace)
                      if region_exist:
                          log.debug('Using already configured region: %s', region_namespace)
                          return region_exist
                      expiration_time = region_obj.expiration_time
                      cache_dir = region_meta.dogpile_config_defaults['cache_dir']
                      namespace_cache_dir = cache_dir
                      # we default the namespace_cache_dir to our default cache dir.
                      # however if this backend is configured with filename= param, we prioritize that
                      # so all caches within that particular region, even those namespaced end up in the same path
                      if region_obj.actual_backend.filename:
                          namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename)
                      if not os.path.isdir(namespace_cache_dir):
                          os.makedirs(namespace_cache_dir)
                      new_region = make_region(
                          name=region_uid_name,
                          function_key_generator=backend_key_generator(region_obj.actual_backend)
                      )
                      namespace_filename = os.path.join(
                          namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db")
                      # special type that allows 1db per namespace
                      new_region.configure(
                          backend='dogpile.cache.rc.file_namespace',
                          expiration_time=expiration_time,
                          arguments={"filename": namespace_filename}
                      )
                      # create and save in region caches
                      log.debug('configuring new region: %s', region_uid_name)
                      region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region
                  region_obj._default_namespace = region_namespace
                  if use_async_runner:
                      region_obj.async_creation_runner = async_creation_runner
                  return region_obj
              def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str):
                  from . import CLEAR_DELETE, CLEAR_INVALIDATE
                  if not isinstance(cache_region, RhodeCodeCacheRegion):
                      cache_region = get_or_create_region(cache_region, cache_namespace_uid)
                  log.debug('clearing cache region: %s with method=%s', cache_region, method)
                  num_affected_keys = None
                  if method == CLEAR_INVALIDATE:
                      # NOTE: The CacheRegion.invalidate() method’s default mode of
                      # operation is to set a timestamp local to this CacheRegion in this Python process only.
                      # It does not impact other Python processes or regions as the timestamp is only stored locally in memory.
                      cache_region.invalidate(hard=True)
                  if method == CLEAR_DELETE:
                      cache_keys = cache_region.backend.list_keys(prefix=cache_namespace_uid)
                      num_affected_keys = len(cache_keys)
                      if num_affected_keys:
                          cache_region.delete_multi(cache_keys)
                  return num_affected_keys

vcsserver/remote/git.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import collections
              import logging
              import os
              import re
              import stat
              import traceback
              import urllib.request
              import urllib.parse
              import urllib.error
              from functools import wraps
              import more_itertools
              import pygit2
              from pygit2 import Repository as LibGit2Repo
              from pygit2 import index as LibGit2Index
              from dulwich import index, objects
              from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
              from dulwich.errors import (
                  NotGitRepository, ChecksumMismatch, WrongObjectException,
                  MissingCommitError, ObjectMissing, HangupException,
                  UnexpectedCommandError)
              from dulwich.repo import Repo as DulwichRepo
              from dulwich.server import update_server_info
              from vcsserver import exceptions, settings, subprocessio
              from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
              from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
              from vcsserver.hgcompat import (
                  hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
              from vcsserver.git_lfs.lib import LFSOidStore
              from vcsserver.vcs_base import RemoteBase
              DIR_STAT = stat.S_IFDIR
              FILE_MODE = stat.S_IFMT
              GIT_LINK = objects.S_IFGITLINK
              PEELED_REF_MARKER = b'^{}'
              HEAD_MARKER = b'HEAD'
              log = logging.getLogger(__name__)
              def reraise_safe_exceptions(func):
                  """Converts Dulwich exceptions to something neutral."""
                  @wraps(func)
                  def wrapper(*args, **kwargs):
                      try:
                          return func(*args, **kwargs)
                      except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
                          exc = exceptions.LookupException(org_exc=e)
                          raise exc(safe_str(e))
                      except (HangupException, UnexpectedCommandError) as e:
                          exc = exceptions.VcsException(org_exc=e)
                          raise exc(safe_str(e))
                      except Exception:
                          # NOTE(marcink): because of how dulwich handles some exceptions
                          # (KeyError on empty repos), we cannot track this and catch all
                          # exceptions, it's an exceptions from other handlers
                          #if not hasattr(e, '_vcs_kind'):
                              #log.exception("Unhandled exception in git remote call")
                              #raise_from_original(exceptions.UnhandledException)
                          raise
                  return wrapper
              class Repo(DulwichRepo):
                  """
                  A wrapper for dulwich Repo class.
                  Since dulwich is sometimes keeping .idx file descriptors open, it leads to
                  "Too many open files" error. We need to close all opened file descriptors
                  once the repo object is destroyed.
                  """
                  def __del__(self):
                      if hasattr(self, 'object_store'):
                          self.close()
              class Repository(LibGit2Repo):
                  def __enter__(self):
                      return self
                  def __exit__(self, exc_type, exc_val, exc_tb):
                      self.free()
              class GitFactory(RepoFactory):
                  repo_type = 'git'
                  def _create_repo(self, wire, create, use_libgit2=False):
                      if use_libgit2:
                          repo = Repository(safe_bytes(wire['path']))
                      else:
                          # dulwich mode
                          repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
                          repo = Repo(repo_path)
                      log.debug('repository created: got GIT object: %s', repo)
                      return repo
                  def repo(self, wire, create=False, use_libgit2=False):
                      """
                      Get a repository instance for the given path.
                      """
                      return self._create_repo(wire, create, use_libgit2)
                  def repo_libgit2(self, wire):
                      return self.repo(wire, use_libgit2=True)
              def create_signature_from_string(author_str, **kwargs):
                  """
                  Creates a pygit2.Signature object from a string of the format 'Name <email>'.
                  :param author_str: String of the format 'Name <email>'
                  :return: pygit2.Signature object
                  """
                  match = re.match(r'^(.+) <(.+)>$', author_str)
                  if match is None:
                      raise ValueError(f"Invalid format: {author_str}")
                  name, email = match.groups()
                  return pygit2.Signature(name, email, **kwargs)
              def get_obfuscated_url(url_obj):
                  url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
                  url_obj.query = obfuscate_qs(url_obj.query)
                  obfuscated_uri = str(url_obj)
                  return obfuscated_uri
              class GitRemote(RemoteBase):
                  def __init__(self, factory):
                      self._factory = factory
                      self._bulk_methods = {
                          "date": self.date,
                          "author": self.author,
                          "branch": self.branch,
                          "message": self.message,
                          "parents": self.parents,
                          "_commit": self.revision,
                      }
                      self._bulk_file_methods = {
                          "size": self.get_node_size,
                          "data": self.get_node_data,
                          "flags": self.get_node_flags,
                          "is_binary": self.get_node_is_binary,
                          "md5": self.md5_hash
                      }
                  def _wire_to_config(self, wire):
                      if 'config' in wire:
                          return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
                      return {}
                  def _remote_conf(self, config):
                      params = [
                          '-c', 'core.askpass=""',
                      ]
                      ssl_cert_dir = config.get('vcs_ssl_dir')
                      if ssl_cert_dir:
                          params.extend(['-c', f'http.sslCAinfo={ssl_cert_dir}'])
                      return params
                  @reraise_safe_exceptions
                  def discover_git_version(self):
                      stdout, _ = self.run_git_command(
                          {}, ['--version'], _bare=True, _safe=True)
                      prefix = b'git version'
                      if stdout.startswith(prefix):
                          stdout = stdout[len(prefix):]
                      return safe_str(stdout.strip())
                  @reraise_safe_exceptions
                  def is_empty(self, wire):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          try:
                              has_head = repo.head.name
                              if has_head:
                                  return False
                              # NOTE(marcink): check again using more expensive method
                              return repo.is_empty
                          except Exception:
                              pass
                          return True
                  @reraise_safe_exceptions
                  def assert_correct_path(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _assert_correct_path(_context_uid, _repo_id, fast_check):
                          if fast_check:
                              path = safe_str(wire['path'])
                              if pygit2.discover_repository(path):
                                  return True
                              return False
                          else:
                              try:
                                  repo_init = self._factory.repo_libgit2(wire)
                                  with repo_init:
                                      pass
                              except pygit2.GitError:
                                  path = wire.get('path')
                                  tb = traceback.format_exc()
                                  log.debug("Invalid Git path `%s`, tb: %s", path, tb)
                                  return False
                              return True
                      return _assert_correct_path(context_uid, repo_id, True)
                  @reraise_safe_exceptions
                  def bare(self, wire):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          return repo.is_bare
                  @reraise_safe_exceptions
                  def get_node_data(self, wire, commit_id, path):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blob_obj = commit.tree[path]
                          if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                              raise exceptions.LookupException()(
                                  f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
                          return BytesEnvelope(blob_obj.data)
                  @reraise_safe_exceptions
                  def get_node_size(self, wire, commit_id, path):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blob_obj = commit.tree[path]
                          if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                              raise exceptions.LookupException()(
                                  f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
                          return blob_obj.size
                  @reraise_safe_exceptions
                  def get_node_flags(self, wire, commit_id, path):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blob_obj = commit.tree[path]
                          if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                              raise exceptions.LookupException()(
                                  f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
                          return blob_obj.filemode
                  @reraise_safe_exceptions
                  def get_node_is_binary(self, wire, commit_id, path):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blob_obj = commit.tree[path]
                          if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                              raise exceptions.LookupException()(
                                  f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
                          return blob_obj.is_binary
                  @reraise_safe_exceptions
                  def blob_as_pretty_string(self, wire, sha):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          blob_obj = repo[sha]
                          return BytesEnvelope(blob_obj.data)
                  @reraise_safe_exceptions
                  def blob_raw_length(self, wire, sha):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _blob_raw_length(_repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              blob = repo[sha]
                              return blob.size
                      return _blob_raw_length(repo_id, sha)
                  def _parse_lfs_pointer(self, raw_content):
                      spec_string = b'version https://git-lfs.github.com/spec'
                      if raw_content and raw_content.startswith(spec_string):
                          pattern = re.compile(rb"""
                          (?:\n)?
                          ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
                          ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
                          ^size[ ](?P<oid_size>[0-9]+)\n
                          (?:\n)?
                          """, re.VERBOSE | re.MULTILINE)
                          match = pattern.match(raw_content)
                          if match:
                              return match.groupdict()
                      return {}
                  @reraise_safe_exceptions
                  def is_large_file(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_large_file(_repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              blob = repo[commit_id]
                              if blob.is_binary:
                                  return {}
                              return self._parse_lfs_pointer(blob.data)
                      return _is_large_file(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def is_binary(self, wire, tree_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_binary(_repo_id, _tree_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              blob_obj = repo[tree_id]
                              return blob_obj.is_binary
                      return _is_binary(repo_id, tree_id)
                  @reraise_safe_exceptions
                  def md5_hash(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _md5_hash(_repo_id, _commit_id, _path):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[_commit_id]
                              blob_obj = commit.tree[_path]
                              if blob_obj.type != pygit2.GIT_OBJ_BLOB:
                                  raise exceptions.LookupException()(
                                      f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
                              return ''
                      return _md5_hash(repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def in_largefiles_store(self, wire, oid):
                      conf = self._wire_to_config(wire)
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo_name = repo.path
                      store_location = conf.get('vcs_git_lfs_store_location')
                      if store_location:
                          store = LFSOidStore(
                              oid=oid, repo=repo_name, store_location=store_location)
                          return store.has_oid()
                      return False
                  @reraise_safe_exceptions
                  def store_path(self, wire, oid):
                      conf = self._wire_to_config(wire)
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo_name = repo.path
                      store_location = conf.get('vcs_git_lfs_store_location')
                      if store_location:
                          store = LFSOidStore(
                              oid=oid, repo=repo_name, store_location=store_location)
                          return store.oid_path
                      raise ValueError(f'Unable to fetch oid with path {oid}')
                  @reraise_safe_exceptions
                  def bulk_request(self, wire, rev, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_request(_repo_id, _rev, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_methods[attr]
                                  wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  args = [wire, rev]
                                  result[attr] = method(*args)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
                          return result
                      return _bulk_request(repo_id, rev, sorted(pre_load))
                  @reraise_safe_exceptions
                  def bulk_file_request(self, wire, commit_id, path, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_file_methods[attr]
                                  wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  result[attr] = method(wire, _commit_id, _path)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
                          return BinaryEnvelope(result)
                      return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
                  def _build_opener(self, url: str):
                      handlers = []
                      url_obj = url_parser(safe_bytes(url))
                      authinfo = url_obj.authinfo()[1]
                      if authinfo:
                          # create a password manager
                          passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
                          passmgr.add_password(*authinfo)
                          handlers.extend((httpbasicauthhandler(passmgr),
                                           httpdigestauthhandler(passmgr)))
                      return urllib.request.build_opener(*handlers)
                  @reraise_safe_exceptions
                  def check_url(self, url, config):
                      url_obj = url_parser(safe_bytes(url))
                      test_uri = safe_str(url_obj.authinfo()[0])
                      obfuscated_uri = get_obfuscated_url(url_obj)
                      log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
                      if not test_uri.endswith('info/refs'):
                          test_uri = test_uri.rstrip('/') + '/info/refs'
                      o = self._build_opener(test_uri)
                      o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
                      q = {"service": 'git-upload-pack'}
                      qs = '?%s' % urllib.parse.urlencode(q)
-                     cu = "{}{}".format(test_uri, qs)
+                     cu = f"{test_uri}{qs}"
                      req = urllib.request.Request(cu, None, {})
                      try:
                          log.debug("Trying to open URL %s", obfuscated_uri)
                          resp = o.open(req)
                          if resp.code != 200:
                              raise exceptions.URLError()('Return Code is not 200')
                      except Exception as e:
                          log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
                          # means it cannot be cloned
-                         raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e))
+                         raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
                      # now detect if it's proper git repo
                      gitdata: bytes = resp.read()
                      if b'service=git-upload-pack' in gitdata:
                          pass
                      elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
                          # old style git can return some other format !
                          pass
                      else:
                          e = None
                          raise exceptions.URLError(e)(
                              "url [%s] does not look like an hg repo org_exc: %s"
                              % (obfuscated_uri, e))
                      return True
                  @reraise_safe_exceptions
                  def clone(self, wire, url, deferred, valid_refs, update_after_clone):
                      # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
                      remote_refs = self.pull(wire, url, apply_refs=False)
                      repo = self._factory.repo(wire)
                      if isinstance(valid_refs, list):
                          valid_refs = tuple(valid_refs)
                      for k in remote_refs:
                          # only parse heads/tags and skip so called deferred tags
                          if k.startswith(valid_refs) and not k.endswith(deferred):
                              repo[k] = remote_refs[k]
                      if update_after_clone:
                          # we want to checkout HEAD
                          repo["HEAD"] = remote_refs["HEAD"]
                          index.build_index_from_tree(repo.path, repo.index_path(),
                                                      repo.object_store, repo["HEAD"].tree)
                  @reraise_safe_exceptions
                  def branch(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _branch(_context_uid, _repo_id, _commit_id):
                          regex = re.compile('^refs/heads')
                          def filter_with(ref):
                              return regex.match(ref[0]) and ref[1] == _commit_id
                          branches = list(filter(filter_with, list(self.get_refs(wire).items())))
                          return [x[0].split('refs/heads/')[-1] for x in branches]
                      return _branch(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def commit_branches(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _commit_branches(_context_uid, _repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              branches = [x for x in repo.branches.with_commit(_commit_id)]
                              return branches
                      return _commit_branches(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def add_object(self, wire, content):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          blob = objects.Blob()
                          blob.set_raw_string(content)
                          repo.object_store.add_object(blob)
                          return blob.id
                  @reraise_safe_exceptions
                  def create_commit(self, wire, author, committer, message, branch, new_tree_id, date_args: list[int, int] = None):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          if date_args:
                              current_time, offset = date_args
                              kw = {
                                  'time': current_time,
                                  'offset': offset
                              }
                          author = create_signature_from_string(author, **kw)
                          committer = create_signature_from_string(committer, **kw)
                          tree = new_tree_id
                          if isinstance(tree, (bytes, str)):
                              # validate this tree is in the repo...
                              tree = repo[safe_str(tree)].id
                          parents = []
                          # ensure we COMMIT on top of given branch head
                          # check if this repo has ANY branches, otherwise it's a new branch case we need to make
                          if branch in repo.branches.local:
                              parents += [repo.branches[branch].target]
                          elif [x for x in repo.branches.local]:
                              parents += [repo.head.target]
                          #else:
                              # in case we want to commit on new branch we create it on top of HEAD
                              #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
                          # # Create a new commit
                          commit_oid = repo.create_commit(
                              f'refs/heads/{branch}',  # the name of the reference to update
                              author,  # the author of the commit
                              committer,  # the committer of the commit
                              message,  # the commit message
                              tree,  # the tree produced by the index
                              parents  # list of parents for the new commit, usually just one,
                          )
                          new_commit_id = safe_str(commit_oid)
                          return new_commit_id
                  @reraise_safe_exceptions
                  def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
                      def mode2pygit(mode):
                          """
                          git only supports two filemode 644 and 755
 o100755 -> 33261
 o100644 -> 33188
                          """
                          return {
 o100644: pygit2.GIT_FILEMODE_BLOB,
 o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
 o120000: pygit2.GIT_FILEMODE_LINK
                          }.get(mode) or pygit2.GIT_FILEMODE_BLOB
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo_index = repo.index
                          for pathspec in updated:
                              blob_id = repo.create_blob(pathspec['content'])
                              ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
                              repo_index.add(ie)
                          for pathspec in removed:
                              repo_index.remove(pathspec)
                          # Write changes to the index
                          repo_index.write()
                          # Create a tree from the updated index
                          commit_tree = repo_index.write_tree()
                      new_tree_id = commit_tree
                      author = commit_data['author']
                      committer = commit_data['committer']
                      message = commit_data['message']
                      date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
                      new_commit_id = self.create_commit(wire, author, committer, message, branch,
                                                         new_tree_id, date_args=date_args)
                      # libgit2, ensure the branch is there and exists
                      self.create_branch(wire, branch, new_commit_id)
                      # libgit2, set new ref to this created commit
                      self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
                      return new_commit_id
                  @reraise_safe_exceptions
                  def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
                      if url != 'default' and '://' not in url:
                          client = LocalGitClient(url)
                      else:
                          url_obj = url_parser(safe_bytes(url))
                          o = self._build_opener(url)
                          url = url_obj.authinfo()[0]
                          client = HttpGitClient(base_url=url, opener=o)
                      repo = self._factory.repo(wire)
                      determine_wants = repo.object_store.determine_wants_all
                      if refs:
                          refs = [ascii_bytes(x) for x in refs]
                          def determine_wants_requested(remote_refs):
                              determined = []
                              for ref_name, ref_hash in remote_refs.items():
                                  bytes_ref_name = safe_bytes(ref_name)
                                  if bytes_ref_name in refs:
                                      bytes_ref_hash = safe_bytes(ref_hash)
                                      determined.append(bytes_ref_hash)
                              return determined
                          # swap with our custom requested wants
                          determine_wants = determine_wants_requested
                      try:
                          remote_refs = client.fetch(
                              path=url, target=repo, determine_wants=determine_wants)
                      except NotGitRepository as e:
                          log.warning(
                              'Trying to fetch from "%s" failed, not a Git repository.', url)
                          # Exception can contain unicode which we convert
                          raise exceptions.AbortException(e)(repr(e))
                      # mikhail: client.fetch() returns all the remote refs, but fetches only
                      # refs filtered by `determine_wants` function. We need to filter result
                      # as well
                      if refs:
                          remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
                      if apply_refs:
                          # TODO: johbo: Needs proper test coverage with a git repository
                          # that contains a tag object, so that we would end up with
                          # a peeled ref at this point.
                          for k in remote_refs:
                              if k.endswith(PEELED_REF_MARKER):
                                  log.debug("Skipping peeled reference %s", k)
                                  continue
                              repo[k] = remote_refs[k]
                          if refs and not update_after:
                              # mikhail: explicitly set the head to the last ref.
                              repo[HEAD_MARKER] = remote_refs[refs[-1]]
                      if update_after:
                          # we want to check out HEAD
                          repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
                          index.build_index_from_tree(repo.path, repo.index_path(),
                                                      repo.object_store, repo[HEAD_MARKER].tree)
                      if isinstance(remote_refs, FetchPackResult):
                          return remote_refs.refs
                      return remote_refs
                  @reraise_safe_exceptions
                  def sync_fetch(self, wire, url, refs=None, all_refs=False):
                      self._factory.repo(wire)
                      if refs and not isinstance(refs, (list, tuple)):
                          refs = [refs]
                      config = self._wire_to_config(wire)
                      # get all remote refs we'll use to fetch later
                      cmd = ['ls-remote']
                      if not all_refs:
                          cmd += ['--heads', '--tags']
                      cmd += [url]
                      output, __ = self.run_git_command(
                          wire, cmd, fail_on_stderr=False,
                          _copts=self._remote_conf(config),
                          extra_env={'GIT_TERMINAL_PROMPT': '0'})
                      remote_refs = collections.OrderedDict()
                      fetch_refs = []
                      for ref_line in output.splitlines():
                          sha, ref = ref_line.split(b'\t')
                          sha = sha.strip()
                          if ref in remote_refs:
                              # duplicate, skip
                              continue
                          if ref.endswith(PEELED_REF_MARKER):
                              log.debug("Skipping peeled reference %s", ref)
                              continue
                          # don't sync HEAD
                          if ref in [HEAD_MARKER]:
                              continue
                          remote_refs[ref] = sha
                          if refs and sha in refs:
                              # we filter fetch using our specified refs
                              fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
                          elif not refs:
                              fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
                      log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
                      if fetch_refs:
                          for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
                              fetch_refs_chunks = list(chunk)
                              log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
                              self.run_git_command(
                                  wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
                                  fail_on_stderr=False,
                                  _copts=self._remote_conf(config),
                                  extra_env={'GIT_TERMINAL_PROMPT': '0'})
                      return remote_refs
                  @reraise_safe_exceptions
                  def sync_push(self, wire, url, refs=None):
                      if not self.check_url(url, wire):
                          return
                      config = self._wire_to_config(wire)
                      self._factory.repo(wire)
                      self.run_git_command(
                          wire, ['push', url, '--mirror'], fail_on_stderr=False,
                          _copts=self._remote_conf(config),
                          extra_env={'GIT_TERMINAL_PROMPT': '0'})
                  @reraise_safe_exceptions
                  def get_remote_refs(self, wire, url):
                      repo = Repo(url)
                      return repo.get_refs()
                  @reraise_safe_exceptions
                  def get_description(self, wire):
                      repo = self._factory.repo(wire)
                      return repo.get_description()
                  @reraise_safe_exceptions
                  def get_missing_revs(self, wire, rev1, rev2, path2):
                      repo = self._factory.repo(wire)
                      LocalGitClient(thin_packs=False).fetch(path2, repo)
                      wire_remote = wire.copy()
                      wire_remote['path'] = path2
                      repo_remote = self._factory.repo(wire_remote)
                      LocalGitClient(thin_packs=False).fetch(path2, repo_remote)
                      revs = [
                          x.commit.id
                          for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
                      return revs
                  @reraise_safe_exceptions
                  def get_object(self, wire, sha, maybe_unreachable=False):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_object(_context_uid, _repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
                              try:
                                  commit = repo.revparse_single(sha)
                              except KeyError:
                                  # NOTE(marcink): KeyError doesn't give us any meaningful information
                                  # here, we instead give something more explicit
                                  e = exceptions.RefNotFoundException('SHA: %s not found', sha)
                                  raise exceptions.LookupException(e)(missing_commit_err)
                              except ValueError as e:
                                  raise exceptions.LookupException(e)(missing_commit_err)
                              is_tag = False
                              if isinstance(commit, pygit2.Tag):
                                  commit = repo.get(commit.target)
                                  is_tag = True
                              check_dangling = True
                              if is_tag:
                                  check_dangling = False
                              if check_dangling and maybe_unreachable:
                                  check_dangling = False
                              # we used a reference and it parsed means we're not having a dangling commit
                              if sha != commit.hex:
                                  check_dangling = False
                              if check_dangling:
                                  # check for dangling commit
                                  for branch in repo.branches.with_commit(commit.hex):
                                      if branch:
                                          break
                                  else:
                                      # NOTE(marcink): Empty error doesn't give us any meaningful information
                                      # here, we instead give something more explicit
                                      e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
                                      raise exceptions.LookupException(e)(missing_commit_err)
                              commit_id = commit.hex
                              type_str = commit.type_str
                              return {
                                  'id': commit_id,
                                  'type': type_str,
                                  'commit_id': commit_id,
                                  'idx': 0
                              }
                      return _get_object(context_uid, repo_id, sha)
                  @reraise_safe_exceptions
                  def get_refs(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_refs(_context_uid, _repo_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              regex = re.compile('^refs/(heads|tags)/')
                              return {x.name: x.target.hex for x in
                                      [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
                      return _get_refs(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def get_branch_pointers(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_branch_pointers(_context_uid, _repo_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          regex = re.compile('^refs/heads')
                          with repo_init as repo:
                              branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
                              return {x.target.hex: x.shorthand for x in branches}
                      return _get_branch_pointers(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def head(self, wire, show_exc=True):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _head(_context_uid, _repo_id, _show_exc):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              try:
                                  return repo.head.peel().hex
                              except Exception:
                                  if show_exc:
                                      raise
                      return _head(context_uid, repo_id, show_exc)
                  @reraise_safe_exceptions
                  def init(self, wire):
                      repo_path = safe_str(wire['path'])
                      self.repo = Repo.init(repo_path)
                  @reraise_safe_exceptions
                  def init_bare(self, wire):
                      repo_path = safe_str(wire['path'])
                      self.repo = Repo.init_bare(repo_path)
                  @reraise_safe_exceptions
                  def revision(self, wire, rev):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _revision(_context_uid, _repo_id, _rev):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[rev]
                              obj_data = {
                                  'id': commit.id.hex,
                              }
                              # tree objects itself don't have tree_id attribute
                              if hasattr(commit, 'tree_id'):
                                  obj_data['tree'] = commit.tree_id.hex
                              return obj_data
                      return _revision(context_uid, repo_id, rev)
                  @reraise_safe_exceptions
                  def date(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _date(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if hasattr(commit, 'commit_time'):
                                  commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
                              else:
                                  commit = commit.get_object()
                                  commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
                              # TODO(marcink): check dulwich difference of offset vs timezone
                              return [commit_time, commit_time_offset]
                      return _date(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def author(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _author(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if hasattr(commit, 'author'):
                                  author = commit.author
                              else:
                                  author = commit.get_object().author
                              if author.email:
                                  return f"{author.name} <{author.email}>"
                              try:
                                  return f"{author.name}"
                              except Exception:
                                  return f"{safe_str(author.raw_name)}"
                      return _author(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def message(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _message(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              return commit.message
                      return _message(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def parents(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _parents(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if hasattr(commit, 'parent_ids'):
                                  parent_ids = commit.parent_ids
                              else:
                                  parent_ids = commit.get_object().parent_ids
                              return [x.hex for x in parent_ids]
                      return _parents(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def children(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      head = self.head(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _children(_repo_id, _commit_id):
                          output, __ = self.run_git_command(
                              wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
                          child_ids = []
                          pat = re.compile(fr'^{commit_id}')
                          for line in output.splitlines():
                              line = safe_str(line)
                              if pat.match(line):
                                  found_ids = line.split(' ')[1:]
                                  child_ids.extend(found_ids)
                                  break
                          return child_ids
                      return _children(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def set_refs(self, wire, key, value):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo.references.create(key, value, force=True)
                  @reraise_safe_exceptions
                  def create_branch(self, wire, branch_name, commit_id, force=False):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          if commit_id:
                              commit = repo[commit_id]
                          else:
                              # if commit is not given  just use the HEAD
                              commit = repo.head()
                          if force:
                              repo.branches.local.create(branch_name, commit, force=force)
                          elif not repo.branches.get(branch_name):
                              # create only if that branch isn't existing
                              repo.branches.local.create(branch_name, commit, force=force)
                  @reraise_safe_exceptions
                  def remove_ref(self, wire, key):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo.references.delete(key)
                  @reraise_safe_exceptions
                  def tag_remove(self, wire, tag_name):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          key = f'refs/tags/{tag_name}'
                          repo.references.delete(key)
                  @reraise_safe_exceptions
                  def tree_changes(self, wire, source_id, target_id):
                      repo = self._factory.repo(wire)
                      # source can be empty
                      source_id = safe_bytes(source_id if source_id else b'')
                      target_id = safe_bytes(target_id)
                      source = repo[source_id].tree if source_id else None
                      target = repo[target_id].tree
                      result = repo.object_store.tree_changes(source, target)
                      added = set()
                      modified = set()
                      deleted = set()
                      for (old_path, new_path), (_, _), (_, _) in list(result):
                          if new_path and old_path:
                              modified.add(new_path)
                          elif new_path and not old_path:
                              added.add(new_path)
                          elif not new_path and old_path:
                              deleted.add(old_path)
                      return list(added), list(modified), list(deleted)
                  @reraise_safe_exceptions
                  def tree_and_type_for_path(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              try:
                                  tree = commit.tree[path]
                              except KeyError:
                                  return None, None, None
                              return tree.id.hex, tree.type_str, tree.filemode
                      return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def tree_items(self, wire, tree_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tree_items(_repo_id, _tree_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              try:
                                  tree = repo[tree_id]
                              except KeyError:
                                  raise ObjectMissing(f'No tree with id: {tree_id}')
                              result = []
                              for item in tree:
                                  item_sha = item.hex
                                  item_mode = item.filemode
                                  item_type = item.type_str
                                  if item_type == 'commit':
                                      # NOTE(marcink): submodules we translate to 'link' for backward compat
                                      item_type = 'link'
                                  result.append((item.name, item_mode, item_sha, item_type))
                              return result
                      return _tree_items(repo_id, tree_id)
                  @reraise_safe_exceptions
                  def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
                      """
                      Old version that uses subprocess to call diff
                      """
                      flags = [
                          '-U%s' % context, '--patch',
                          '--binary',
                          '--find-renames',
                          '--no-indent-heuristic',
                          # '--indent-heuristic',
                          #'--full-index',
                          #'--abbrev=40'
                      ]
                      if opt_ignorews:
                          flags.append('--ignore-all-space')
                      if commit_id_1 == self.EMPTY_COMMIT:
                          cmd = ['show'] + flags + [commit_id_2]
                      else:
                          cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
                      if file_filter:
                          cmd.extend(['--', file_filter])
                      diff, __ = self.run_git_command(wire, cmd)
                      # If we used 'show' command, strip first few lines (until actual diff
                      # starts)
                      if commit_id_1 == self.EMPTY_COMMIT:
                          lines = diff.splitlines()
                          x = 0
                          for line in lines:
                              if line.startswith(b'diff'):
                                  break
                              x += 1
                          # Append new line just like 'diff' command do
                          diff = '\n'.join(lines[x:]) + '\n'
                      return diff
                  @reraise_safe_exceptions
                  def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          swap = True
                          flags = 0
                          flags |= pygit2.GIT_DIFF_SHOW_BINARY
                          if opt_ignorews:
                              flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
                          if commit_id_1 == self.EMPTY_COMMIT:
                              comm1 = repo[commit_id_2]
                              diff_obj = comm1.tree.diff_to_tree(
                                  flags=flags, context_lines=context, swap=swap)
                          else:
                              comm1 = repo[commit_id_2]
                              comm2 = repo[commit_id_1]
                              diff_obj = comm1.tree.diff_to_tree(
                                  comm2.tree, flags=flags, context_lines=context, swap=swap)
                          similar_flags = 0
                          similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
                          diff_obj.find_similar(flags=similar_flags)
                          if file_filter:
                              for p in diff_obj:
                                  if p.delta.old_file.path == file_filter:
                                      return BytesEnvelope(p.data) or BytesEnvelope(b'')
                              # fo matching path == no diff
                              return BytesEnvelope(b'')
                          return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
                  @reraise_safe_exceptions
                  def node_history(self, wire, commit_id, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
                          # optimize for n==1, rev-list is much faster for that use-case
                          if limit == 1:
                              cmd = ['rev-list', '-1', commit_id, '--', path]
                          else:
                              cmd = ['log']
                              if limit:
                                  cmd.extend(['-n', str(safe_int(limit, 0))])
                              cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
                          output, __ = self.run_git_command(wire, cmd)
                          commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
                          return [x for x in commit_ids]
                      return _node_history(context_uid, repo_id, commit_id, path, limit)
                  @reraise_safe_exceptions
                  def node_annotate_legacy(self, wire, commit_id, path):
                      # note: replaced by pygit2 implementation
                      cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
                      # -l     ==> outputs long shas (and we need all 40 characters)
                      # --root ==> doesn't put '^' character for boundaries
                      # -r commit_id ==> blames for the given commit
                      output, __ = self.run_git_command(wire, cmd)
                      result = []
                      for i, blame_line in enumerate(output.splitlines()[:-1]):
                          line_no = i + 1
                          blame_commit_id, line = re.split(rb' ', blame_line, 1)
                          result.append((line_no, blame_commit_id, line))
                      return result
                  @reraise_safe_exceptions
                  def node_annotate(self, wire, commit_id, path):
                      result_libgit = []
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          commit = repo[commit_id]
                          blame_obj = repo.blame(path, newest_commit=commit_id)
                          for i, line in enumerate(commit.tree[path].data.splitlines()):
                              line_no = i + 1
                              hunk = blame_obj.for_line(line_no)
                              blame_commit_id = hunk.final_commit_id.hex
                              result_libgit.append((line_no, blame_commit_id, line))
                      return result_libgit
                  @reraise_safe_exceptions
                  def update_server_info(self, wire):
                      repo = self._factory.repo(wire)
                      update_server_info(repo)
                  @reraise_safe_exceptions
                  def get_all_commit_ids(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_all_commit_ids(_context_uid, _repo_id):
                          cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
                          try:
                              output, __ = self.run_git_command(wire, cmd)
                              return output.splitlines()
                          except Exception:
                              # Can be raised for empty repositories
                              return []
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
                          results = []
                          with repo_init as repo:
                              for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
                                  results.append(commit.id.hex)
                      return _get_all_commit_ids(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def run_git_command(self, wire, cmd, **opts):
                      path = wire.get('path', None)
                      if path and os.path.isdir(path):
                          opts['cwd'] = path
                      if '_bare' in opts:
                          _copts = []
                          del opts['_bare']
                      else:
                          _copts = ['-c', 'core.quotepath=false',]
                      safe_call = False
                      if '_safe' in opts:
                          # no exc on failure
                          del opts['_safe']
                          safe_call = True
                      if '_copts' in opts:
                          _copts.extend(opts['_copts'] or [])
                          del opts['_copts']
                      gitenv = os.environ.copy()
                      gitenv.update(opts.pop('extra_env', {}))
                      # need to clean fix GIT_DIR !
                      if 'GIT_DIR' in gitenv:
                          del gitenv['GIT_DIR']
                      gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
                      gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
                      cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
                      _opts = {'env': gitenv, 'shell': False}
                      proc = None
                      try:
                          _opts.update(opts)
                          proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
                          return b''.join(proc), b''.join(proc.stderr)
                      except OSError as err:
                          cmd = ' '.join(map(safe_str, cmd))  # human friendly CMD
                          tb_err = ("Couldn't run git command (%s).\n"
                                    "Original error was:%s\n"
                                    "Call options:%s\n"
                                    % (cmd, err, _opts))
                          log.exception(tb_err)
                          if safe_call:
                              return '', err
                          else:
                              raise exceptions.VcsException()(tb_err)
                      finally:
                          if proc:
                              proc.close()
                  @reraise_safe_exceptions
                  def install_hooks(self, wire, force=False):
                      from vcsserver.hook_utils import install_git_hooks
                      bare = self.bare(wire)
                      path = wire['path']
                      binary_dir = settings.BINARY_DIR
                      if binary_dir:
                          os.path.join(binary_dir, 'python3')
                      return install_git_hooks(path, bare, force_create=force)
                  @reraise_safe_exceptions
                  def get_hooks_info(self, wire):
                      from vcsserver.hook_utils import (
                          get_git_pre_hook_version, get_git_post_hook_version)
                      bare = self.bare(wire)
                      path = wire['path']
                      return {
                          'pre_version': get_git_pre_hook_version(path, bare),
                          'post_version': get_git_post_hook_version(path, bare),
                      }
                  @reraise_safe_exceptions
                  def set_head_ref(self, wire, head_name):
                      log.debug('Setting refs/head to `%s`', head_name)
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo.set_head(f'refs/heads/{head_name}')
                      return [head_name] + [f'set HEAD to refs/heads/{head_name}']
                  @reraise_safe_exceptions
                  def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
                                   archive_dir_name, commit_id, cache_config):
                      def file_walker(_commit_id, path):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if path in ['', '/']:
                                  tree = commit.tree
                              else:
                                  tree = commit.tree[path.rstrip('/')]
                                  tree_id = tree.id.hex
                                  try:
                                      tree = repo[tree_id]
                                  except KeyError:
                                      raise ObjectMissing(f'No tree with id: {tree_id}')
                              index = LibGit2Index.Index()
                              index.read_tree(tree)
                              file_iter = index
                              for file_node in file_iter:
                                  file_path = file_node.path
                                  mode = file_node.mode
                                  is_link = stat.S_ISLNK(mode)
                                  if mode == pygit2.GIT_FILEMODE_COMMIT:
                                      log.debug('Skipping path %s as a commit node', file_path)
                                      continue
                                  yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
                      return store_archive_in_cache(
                          file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)

vcsserver/remote/hg.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import binascii
              import io
              import logging
              import stat
              import urllib.request
              import urllib.parse
              import traceback
              import hashlib
              from hgext import largefiles, rebase, purge
              from mercurial import commands
              from mercurial import unionrepo
              from mercurial import verify
              from mercurial import repair
              import vcsserver
              from vcsserver import exceptions
              from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, store_archive_in_cache, ArchiveNode, BytesEnvelope, \
                  BinaryEnvelope
              from vcsserver.hgcompat import (
                  archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
                  hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
                  makepeer, instance, match, memctx, exchange, memfilectx, nullrev, hg_merge,
                  patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError,
                  RepoLookupError, InterventionRequired, RequirementError,
                  alwaysmatcher, patternmatcher, hgutil, hgext_strip)
              from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes
              from vcsserver.vcs_base import RemoteBase
              from vcsserver.config import hooks as hooks_config
              log = logging.getLogger(__name__)
              def make_ui_from_config(repo_config):
                  class LoggingUI(ui.ui):
                      def status(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.info(' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).status(*msg, **opts)
                      def warn(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).warn(*msg, **opts)
                      def error(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.error('ui_logger:'+' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).error(*msg, **opts)
                      def note(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.info('ui_logger:'+' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).note(*msg, **opts)
                      def debug(self, *msg, **opts):
                          str_msg = map(safe_str, msg)
                          log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n'))
                          #super(LoggingUI, self).debug(*msg, **opts)
                  baseui = LoggingUI()
                  # clean the baseui object
                  baseui._ocfg = hgconfig.config()
                  baseui._ucfg = hgconfig.config()
                  baseui._tcfg = hgconfig.config()
                  for section, option, value in repo_config:
                      baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value))
                  # make our hgweb quiet so it doesn't print output
                  baseui.setconfig(b'ui', b'quiet', b'true')
                  baseui.setconfig(b'ui', b'paginate', b'never')
                  # for better Error reporting of Mercurial
                  baseui.setconfig(b'ui', b'message-output', b'stderr')
                  # force mercurial to only use 1 thread, otherwise it may try to set a
                  # signal in a non-main thread, thus generating a ValueError.
                  baseui.setconfig(b'worker', b'numcpus', 1)
                  # If there is no config for the largefiles extension, we explicitly disable
                  # it here. This overrides settings from repositories hgrc file. Recent
                  # mercurial versions enable largefiles in hgrc on clone from largefile
                  # repo.
                  if not baseui.hasconfig(b'extensions', b'largefiles'):
                      log.debug('Explicitly disable largefiles extension for repo.')
                      baseui.setconfig(b'extensions', b'largefiles', b'!')
                  return baseui
              def reraise_safe_exceptions(func):
                  """Decorator for converting mercurial exceptions to something neutral."""
                  def wrapper(*args, **kwargs):
                      try:
                          return func(*args, **kwargs)
                      except (Abort, InterventionRequired) as e:
                          raise_from_original(exceptions.AbortException(e), e)
                      except RepoLookupError as e:
                          raise_from_original(exceptions.LookupException(e), e)
                      except RequirementError as e:
                          raise_from_original(exceptions.RequirementException(e), e)
                      except RepoError as e:
                          raise_from_original(exceptions.VcsException(e), e)
                      except LookupError as e:
                          raise_from_original(exceptions.LookupException(e), e)
                      except Exception as e:
                          if not hasattr(e, '_vcs_kind'):
                              log.exception("Unhandled exception in hg remote call")
                              raise_from_original(exceptions.UnhandledException(e), e)
                          raise
                  return wrapper
              class MercurialFactory(RepoFactory):
                  repo_type = 'hg'
                  def _create_config(self, config, hooks=True):
                      if not hooks:
                          hooks_to_clean = {
                              hooks_config.HOOK_REPO_SIZE,
                              hooks_config.HOOK_PRE_PULL,
                              hooks_config.HOOK_PULL,
                              hooks_config.HOOK_PRE_PUSH,
                              # TODO: what about PRETXT, this was disabled in pre 5.0.0
                              hooks_config.HOOK_PRETX_PUSH,
                          }
                          new_config = []
                          for section, option, value in config:
                              if section == 'hooks' and option in hooks_to_clean:
                                  continue
                              new_config.append((section, option, value))
                          config = new_config
                      baseui = make_ui_from_config(config)
                      return baseui
                  def _create_repo(self, wire, create):
                      baseui = self._create_config(wire["config"])
                      repo = instance(baseui, safe_bytes(wire["path"]), create)
                      log.debug('repository created: got HG object: %s', repo)
                      return repo
                  def repo(self, wire, create=False):
                      """
                      Get a repository instance for the given path.
                      """
                      return self._create_repo(wire, create)
              def patch_ui_message_output(baseui):
                  baseui.setconfig(b'ui', b'quiet', b'false')
                  output = io.BytesIO()
                  def write(data, **unused_kwargs):
                      output.write(data)
                  baseui.status = write
                  baseui.write = write
                  baseui.warn = write
                  baseui.debug = write
                  return baseui, output
              def get_obfuscated_url(url_obj):
                  url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
                  url_obj.query = obfuscate_qs(url_obj.query)
                  obfuscated_uri = str(url_obj)
                  return obfuscated_uri
              def normalize_url_for_hg(url: str):
                  _proto = None
                  if '+' in url[:url.find('://')]:
                      _proto = url[0:url.find('+')]
                      url = url[url.find('+') + 1:]
                  return url, _proto
              class HgRemote(RemoteBase):
                  def __init__(self, factory):
                      self._factory = factory
                      self._bulk_methods = {
                          "affected_files": self.ctx_files,
                          "author": self.ctx_user,
                          "branch": self.ctx_branch,
                          "children": self.ctx_children,
                          "date": self.ctx_date,
                          "message": self.ctx_description,
                          "parents": self.ctx_parents,
                          "status": self.ctx_status,
                          "obsolete": self.ctx_obsolete,
                          "phase": self.ctx_phase,
                          "hidden": self.ctx_hidden,
                          "_file_paths": self.ctx_list,
                      }
                      self._bulk_file_methods = {
                          "size": self.fctx_size,
                          "data": self.fctx_node_data,
                          "flags": self.fctx_flags,
                          "is_binary": self.is_binary,
                          "md5": self.md5_hash,
                      }
                  def _get_ctx(self, repo, ref):
                      return get_ctx(repo, ref)
                  @reraise_safe_exceptions
                  def discover_hg_version(self):
                      from mercurial import util
                      return safe_str(util.version())
                  @reraise_safe_exceptions
                  def is_empty(self, wire):
                      repo = self._factory.repo(wire)
                      try:
                          return len(repo) == 0
                      except Exception:
                          log.exception("failed to read object_store")
                          return False
                  @reraise_safe_exceptions
                  def bookmarks(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bookmarks(_context_uid, _repo_id):
                          repo = self._factory.repo(wire)
                          return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo._bookmarks.items()}
                      return _bookmarks(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def branches(self, wire, normal, closed):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _branches(_context_uid, _repo_id, _normal, _closed):
                          repo = self._factory.repo(wire)
                          iter_branches = repo.branchmap().iterbranches()
                          bt = {}
                          for branch_name, _heads, tip_node, is_closed in iter_branches:
                              if normal and not is_closed:
                                  bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
                              if closed and is_closed:
                                  bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
                          return bt
                      return _branches(context_uid, repo_id, normal, closed)
                  @reraise_safe_exceptions
                  def bulk_request(self, wire, commit_id, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_request(_repo_id, _commit_id, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_methods[attr]
                                  wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  result[attr] = method(wire, commit_id)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(
                                      'Unknown bulk attribute: "%s"' % attr)
                          return result
                      return _bulk_request(repo_id, commit_id, sorted(pre_load))
                  @reraise_safe_exceptions
                  def ctx_branch(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_branch(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.branch()
                      return _ctx_branch(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_date(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_date(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.date()
                      return _ctx_date(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_description(self, wire, revision):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      return ctx.description()
                  @reraise_safe_exceptions
                  def ctx_files(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_files(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.files()
                      return _ctx_files(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_list(self, path, revision):
                      repo = self._factory.repo(path)
                      ctx = self._get_ctx(repo, revision)
                      return list(ctx)
                  @reraise_safe_exceptions
                  def ctx_parents(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_parents(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return [parent.hex() for parent in ctx.parents()
                                  if not (parent.hidden() or parent.obsolete())]
                      return _ctx_parents(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_children(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_children(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return [child.hex() for child in ctx.children()
                                  if not (child.hidden() or child.obsolete())]
                      return _ctx_children(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_phase(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_phase(_context_uid, _repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          # public=0, draft=1, secret=3
                          return ctx.phase()
                      return _ctx_phase(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_obsolete(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_obsolete(_context_uid, _repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.obsolete()
                      return _ctx_obsolete(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_hidden(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_hidden(_context_uid, _repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          return ctx.hidden()
                      return _ctx_hidden(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def ctx_substate(self, wire, revision):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      return ctx.substate
                  @reraise_safe_exceptions
                  def ctx_status(self, wire, revision):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      status = repo[ctx.p1().node()].status(other=ctx.node())
                      # object of status (odd, custom named tuple in mercurial) is not
                      # correctly serializable, we make it a list, as the underling
                      # API expects this to be a list
                      return list(status)
                  @reraise_safe_exceptions
                  def ctx_user(self, wire, revision):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      return ctx.user()
                  @reraise_safe_exceptions
                  def check_url(self, url, config):
                      url, _proto = normalize_url_for_hg(url)
                      url_obj = url_parser(safe_bytes(url))
                      test_uri = safe_str(url_obj.authinfo()[0])
                      authinfo = url_obj.authinfo()[1]
                      obfuscated_uri = get_obfuscated_url(url_obj)
                      log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
                      handlers = []
                      if authinfo:
                          # create a password manager
                          passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
                          passmgr.add_password(*authinfo)
                          handlers.extend((httpbasicauthhandler(passmgr),
                                           httpdigestauthhandler(passmgr)))
                      o = urllib.request.build_opener(*handlers)
                      o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
                                      ('Accept', 'application/mercurial-0.1')]
                      q = {"cmd": 'between'}
                      q.update({'pairs': "{}-{}".format('0' * 40, '0' * 40)})
                      qs = '?%s' % urllib.parse.urlencode(q)
-                     cu = "{}{}".format(test_uri, qs)
+                     cu = f"{test_uri}{qs}"
                      req = urllib.request.Request(cu, None, {})
                      try:
                          log.debug("Trying to open URL %s", obfuscated_uri)
                          resp = o.open(req)
                          if resp.code != 200:
                              raise exceptions.URLError()('Return Code is not 200')
                      except Exception as e:
                          log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
                          # means it cannot be cloned
-                         raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e))
+                         raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
                      # now check if it's a proper hg repo, but don't do it for svn
                      try:
                          if _proto == 'svn':
                              pass
                          else:
                              # check for pure hg repos
                              log.debug(
                                  "Verifying if URL is a Mercurial repository: %s", obfuscated_uri)
                              ui = make_ui_from_config(config)
                              peer_checker = makepeer(ui, safe_bytes(url))
                              peer_checker.lookup(b'tip')
                      except Exception as e:
                          log.warning("URL is not a valid Mercurial repository: %s",
                                      obfuscated_uri)
                          raise exceptions.URLError(e)(
                              "url [%s] does not look like an hg repo org_exc: %s"
                              % (obfuscated_uri, e))
                      log.info("URL is a valid Mercurial repository: %s", obfuscated_uri)
                      return True
                  @reraise_safe_exceptions
                  def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_git, opt_ignorews, context):
                      repo = self._factory.repo(wire)
                      if file_filter:
                          # unpack the file-filter
                          repo_path, node_path = file_filter
                          match_filter = match(safe_bytes(repo_path), b'', [safe_bytes(node_path)])
                      else:
                          match_filter = file_filter
                      opts = diffopts(git=opt_git, ignorews=opt_ignorews, context=context, showfunc=1)
                      try:
                          diff_iter = patch.diff(
                              repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
                          return BytesEnvelope(b"".join(diff_iter))
                      except RepoLookupError as e:
                          raise exceptions.LookupException(e)()
                  @reraise_safe_exceptions
                  def node_history(self, wire, revision, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history(_context_uid, _repo_id, _revision, _path, _limit):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
                          fctx = ctx.filectx(safe_bytes(path))
                          def history_iter():
                              limit_rev = fctx.rev()
                              for obj in reversed(list(fctx.filelog())):
                                  obj = fctx.filectx(obj)
                                  ctx = obj.changectx()
                                  if ctx.hidden() or ctx.obsolete():
                                      continue
                                  if limit_rev >= obj.rev():
                                      yield obj
                          history = []
                          for cnt, obj in enumerate(history_iter()):
                              if limit and cnt >= limit:
                                  break
                              history.append(hex(obj.node()))
                          return [x for x in history]
                      return _node_history(context_uid, repo_id, revision, path, limit)
                  @reraise_safe_exceptions
                  def node_history_untill(self, wire, revision, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history_until(_context_uid, _repo_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
                          fctx = ctx.filectx(safe_bytes(path))
                          file_log = list(fctx.filelog())
                          if limit:
                              # Limit to the last n items
                              file_log = file_log[-limit:]
                          return [hex(fctx.filectx(cs).node()) for cs in reversed(file_log)]
                      return _node_history_until(context_uid, repo_id, revision, path, limit)
                  @reraise_safe_exceptions
                  def bulk_file_request(self, wire, commit_id, path, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_file_methods[attr]
                                  wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  result[attr] = method(wire, _commit_id, _path)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
                          return BinaryEnvelope(result)
                      return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load))
                  @reraise_safe_exceptions
                  def fctx_annotate(self, wire, revision, path):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      fctx = ctx.filectx(safe_bytes(path))
                      result = []
                      for i, annotate_obj in enumerate(fctx.annotate(), 1):
                          ln_no = i
                          sha = hex(annotate_obj.fctx.node())
                          content = annotate_obj.text
                          result.append((ln_no, sha, content))
                      return result
                  @reraise_safe_exceptions
                  def fctx_node_data(self, wire, revision, path):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      fctx = ctx.filectx(safe_bytes(path))
                      return BytesEnvelope(fctx.data())
                  @reraise_safe_exceptions
                  def fctx_flags(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _fctx_flags(_repo_id, _commit_id, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          fctx = ctx.filectx(safe_bytes(path))
                          return fctx.flags()
                      return _fctx_flags(repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def fctx_size(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _fctx_size(_repo_id, _revision, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
                          fctx = ctx.filectx(safe_bytes(path))
                          return fctx.size()
                      return _fctx_size(repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def get_all_commit_ids(self, wire, name):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_all_commit_ids(_context_uid, _repo_id, _name):
                          repo = self._factory.repo(wire)
                          revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()]
                          return revs
                      return _get_all_commit_ids(context_uid, repo_id, name)
                  @reraise_safe_exceptions
                  def get_config_value(self, wire, section, name, untrusted=False):
                      repo = self._factory.repo(wire)
                      return repo.ui.config(ascii_bytes(section), ascii_bytes(name), untrusted=untrusted)
                  @reraise_safe_exceptions
                  def is_large_file(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_large_file(_context_uid, _repo_id, _commit_id, _path):
                          return largefiles.lfutil.isstandin(safe_bytes(path))
                      return _is_large_file(context_uid, repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def is_binary(self, wire, revision, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_binary(_repo_id, _sha, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
                          fctx = ctx.filectx(safe_bytes(path))
                          return fctx.isbinary()
                      return _is_binary(repo_id, revision, path)
                  @reraise_safe_exceptions
                  def md5_hash(self, wire, revision, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _md5_hash(_repo_id, _sha, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
                          fctx = ctx.filectx(safe_bytes(path))
                          return hashlib.md5(fctx.data()).hexdigest()
                      return _md5_hash(repo_id, revision, path)
                  @reraise_safe_exceptions
                  def in_largefiles_store(self, wire, sha):
                      repo = self._factory.repo(wire)
                      return largefiles.lfutil.instore(repo, sha)
                  @reraise_safe_exceptions
                  def in_user_cache(self, wire, sha):
                      repo = self._factory.repo(wire)
                      return largefiles.lfutil.inusercache(repo.ui, sha)
                  @reraise_safe_exceptions
                  def store_path(self, wire, sha):
                      repo = self._factory.repo(wire)
                      return largefiles.lfutil.storepath(repo, sha)
                  @reraise_safe_exceptions
                  def link(self, wire, sha, path):
                      repo = self._factory.repo(wire)
                      largefiles.lfutil.link(
                          largefiles.lfutil.usercachepath(repo.ui, sha), path)
                  @reraise_safe_exceptions
                  def localrepository(self, wire, create=False):
                      self._factory.repo(wire, create=create)
                  @reraise_safe_exceptions
                  def lookup(self, wire, revision, both):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _lookup(_context_uid, _repo_id, _revision, _both):
                          repo = self._factory.repo(wire)
                          rev = _revision
                          if isinstance(rev, int):
                              # NOTE(marcink):
                              # since Mercurial doesn't support negative indexes properly
                              # we need to shift accordingly by one to get proper index, e.g
                              # repo[-1] => repo[-2]
                              # repo[0]  => repo[-1]
                              if rev <= 0:
                                  rev = rev + -1
                          try:
                              ctx = self._get_ctx(repo, rev)
                          except (TypeError, RepoLookupError, binascii.Error) as e:
                              e._org_exc_tb = traceback.format_exc()
                              raise exceptions.LookupException(e)(rev)
                          except LookupError as e:
                              e._org_exc_tb = traceback.format_exc()
                              raise exceptions.LookupException(e)(e.name)
                          if not both:
                              return ctx.hex()
                          ctx = repo[ctx.hex()]
                          return ctx.hex(), ctx.rev()
                      return _lookup(context_uid, repo_id, revision, both)
                  @reraise_safe_exceptions
                  def sync_push(self, wire, url):
                      if not self.check_url(url, wire['config']):
                          return
                      repo = self._factory.repo(wire)
                      # Disable any prompts for this repo
                      repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
                      bookmarks = list(dict(repo._bookmarks).keys())
                      remote = peer(repo, {}, safe_bytes(url))
                      # Disable any prompts for this remote
                      remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
                      return exchange.push(
                          repo, remote, newbranch=True, bookmarks=bookmarks).cgresult
                  @reraise_safe_exceptions
                  def revision(self, wire, rev):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, rev)
                      return ctx.rev()
                  @reraise_safe_exceptions
                  def rev_range(self, wire, commit_filter):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _rev_range(_context_uid, _repo_id, _filter):
                          repo = self._factory.repo(wire)
                          revisions = [
                              ascii_str(repo[rev].hex())
                              for rev in revrange(repo, list(map(ascii_bytes, commit_filter)))
                          ]
                          return revisions
                      return _rev_range(context_uid, repo_id, sorted(commit_filter))
                  @reraise_safe_exceptions
                  def rev_range_hash(self, wire, node):
                      repo = self._factory.repo(wire)
                      def get_revs(repo, rev_opt):
                          if rev_opt:
                              revs = revrange(repo, rev_opt)
                              if len(revs) == 0:
                                  return (nullrev, nullrev)
                              return max(revs), min(revs)
                          else:
                              return len(repo) - 1, 0
                      stop, start = get_revs(repo, [node + ':'])
                      revs = [ascii_str(repo[r].hex()) for r in range(start, stop + 1)]
                      return revs
                  @reraise_safe_exceptions
                  def revs_from_revspec(self, wire, rev_spec, *args, **kwargs):
                      org_path = safe_bytes(wire["path"])
                      other_path = safe_bytes(kwargs.pop('other_path', ''))
                      # case when we want to compare two independent repositories
                      if other_path and other_path != wire["path"]:
                          baseui = self._factory._create_config(wire["config"])
                          repo = unionrepo.makeunionrepository(baseui, other_path, org_path)
                      else:
                          repo = self._factory.repo(wire)
                      return list(repo.revs(rev_spec, *args))
                  @reraise_safe_exceptions
                  def verify(self, wire,):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      baseui, output = patch_ui_message_output(baseui)
                      repo.ui = baseui
                      verify.verify(repo)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def hg_update_cache(self, wire,):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      baseui, output = patch_ui_message_output(baseui)
                      repo.ui = baseui
                      with repo.wlock(), repo.lock():
                          repo.updatecaches(full=True)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def hg_rebuild_fn_cache(self, wire,):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      baseui, output = patch_ui_message_output(baseui)
                      repo.ui = baseui
                      repair.rebuildfncache(baseui, repo)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def tags(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tags(_context_uid, _repo_id):
                          repo = self._factory.repo(wire)
                          return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo.tags().items()}
                      return _tags(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def update(self, wire, node='', clean=False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      node = safe_bytes(node)
                      commands.update(baseui, repo, node=node, clean=clean)
                  @reraise_safe_exceptions
                  def identify(self, wire):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      output = io.BytesIO()
                      baseui.write = output.write
                      # This is required to get a full node id
                      baseui.debugflag = True
                      commands.identify(baseui, repo, id=True)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def heads(self, wire, branch=None):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      output = io.BytesIO()
                      def write(data, **unused_kwargs):
                          output.write(data)
                      baseui.write = write
                      if branch:
                          args = [safe_bytes(branch)]
                      else:
                          args = []
                      commands.heads(baseui, repo, template=b'{node} ', *args)
                      return output.getvalue()
                  @reraise_safe_exceptions
                  def ancestor(self, wire, revision1, revision2):
                      repo = self._factory.repo(wire)
                      changelog = repo.changelog
                      lookup = repo.lookup
                      a = changelog.ancestor(lookup(safe_bytes(revision1)), lookup(safe_bytes(revision2)))
                      return hex(a)
                  @reraise_safe_exceptions
                  def clone(self, wire, source, dest, update_after_clone=False, hooks=True):
                      baseui = self._factory._create_config(wire["config"], hooks=hooks)
                      clone(baseui, safe_bytes(source), safe_bytes(dest), noupdate=not update_after_clone)
                  @reraise_safe_exceptions
                  def commitctx(self, wire, message, parents, commit_time, commit_timezone, user, files, extra, removed, updated):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      publishing = baseui.configbool(b'phases', b'publish')
                      def _filectxfn(_repo, ctx, path: bytes):
                          """
                          Marks given path as added/changed/removed in a given _repo. This is
                          for internal mercurial commit function.
                          """
                          # check if this path is removed
                          if safe_str(path) in removed:
                              # returning None is a way to mark node for removal
                              return None
                          # check if this path is added
                          for node in updated:
                              if safe_bytes(node['path']) == path:
                                  return memfilectx(
                                      _repo,
                                      changectx=ctx,
                                      path=safe_bytes(node['path']),
                                      data=safe_bytes(node['content']),
                                      islink=False,
                                      isexec=bool(node['mode'] & stat.S_IXUSR),
                                      copysource=False)
                          abort_exc = exceptions.AbortException()
                          raise abort_exc(f"Given path haven't been marked as added, changed or removed ({path})")
                      if publishing:
                          new_commit_phase = b'public'
                      else:
                          new_commit_phase = b'draft'
                      with repo.ui.configoverride({(b'phases', b'new-commit'): new_commit_phase}):
                          kwargs = {safe_bytes(k): safe_bytes(v) for k, v in extra.items()}
                          commit_ctx = memctx(
                              repo=repo,
                              parents=parents,
                              text=safe_bytes(message),
                              files=[safe_bytes(x) for x in files],
                              filectxfn=_filectxfn,
                              user=safe_bytes(user),
                              date=(commit_time, commit_timezone),
                              extra=kwargs)
                          n = repo.commitctx(commit_ctx)
                          new_id = hex(n)
                          return new_id
                  @reraise_safe_exceptions
                  def pull(self, wire, url, commit_ids=None):
                      repo = self._factory.repo(wire)
                      # Disable any prompts for this repo
                      repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
                      remote = peer(repo, {}, safe_bytes(url))
                      # Disable any prompts for this remote
                      remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
                      if commit_ids:
                          commit_ids = [bin(commit_id) for commit_id in commit_ids]
                      return exchange.pull(
                          repo, remote, heads=commit_ids, force=None).cgresult
                  @reraise_safe_exceptions
                  def pull_cmd(self, wire, source, bookmark='', branch='', revision='', hooks=True):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'], hooks=hooks)
                      source = safe_bytes(source)
                      # Mercurial internally has a lot of logic that checks ONLY if
                      # option is defined, we just pass those if they are defined then
                      opts = {}
                      if bookmark:
                          opts['bookmark'] = [safe_bytes(x) for x in bookmark] \
                              if isinstance(bookmark, list) else safe_bytes(bookmark)
                      if branch:
                          opts['branch'] = [safe_bytes(x) for x in branch] \
                              if isinstance(branch, list) else safe_bytes(branch)
                      if revision:
                          opts['rev'] = [safe_bytes(x) for x in revision] \
                              if isinstance(revision, list) else safe_bytes(revision)
                      commands.pull(baseui, repo, source, **opts)
                  @reraise_safe_exceptions
                  def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'], hooks=hooks)
                      revisions = [safe_bytes(x) for x in revisions] \
                          if isinstance(revisions, list) else safe_bytes(revisions)
                      commands.push(baseui, repo, safe_bytes(dest_path),
                                    rev=revisions,
                                    new_branch=push_branches)
                  @reraise_safe_exceptions
                  def strip(self, wire, revision, update, backup):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      hgext_strip.strip(
                          repo.baseui, repo, ctx.node(), update=update, backup=backup)
                  @reraise_safe_exceptions
                  def get_unresolved_files(self, wire):
                      repo = self._factory.repo(wire)
                      log.debug('Calculating unresolved files for repo: %s', repo)
                      output = io.BytesIO()
                      def write(data, **unused_kwargs):
                          output.write(data)
                      baseui = self._factory._create_config(wire['config'])
                      baseui.write = write
                      commands.resolve(baseui, repo, list=True)
                      unresolved = output.getvalue().splitlines(0)
                      return unresolved
                  @reraise_safe_exceptions
                  def merge(self, wire, revision):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
                      # In case of sub repositories are used mercurial prompts the user in
                      # case of merge conflicts or different sub repository sources. By
                      # setting the interactive flag to `False` mercurial doesn't prompt the
                      # used but instead uses a default value.
                      repo.ui.setconfig(b'ui', b'interactive', False)
                      commands.merge(baseui, repo, rev=safe_bytes(revision))
                  @reraise_safe_exceptions
                  def merge_state(self, wire):
                      repo = self._factory.repo(wire)
                      repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
                      # In case of sub repositories are used mercurial prompts the user in
                      # case of merge conflicts or different sub repository sources. By
                      # setting the interactive flag to `False` mercurial doesn't prompt the
                      # used but instead uses a default value.
                      repo.ui.setconfig(b'ui', b'interactive', False)
                      ms = hg_merge.mergestate(repo)
                      return [x for x in ms.unresolved()]
                  @reraise_safe_exceptions
                  def commit(self, wire, message, username, close_branch=False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      repo.ui.setconfig(b'ui', b'username', safe_bytes(username))
                      commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch)
                  @reraise_safe_exceptions
                  def rebase(self, wire, source='', dest='', abort=False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
                      # In case of sub repositories are used mercurial prompts the user in
                      # case of merge conflicts or different sub repository sources. By
                      # setting the interactive flag to `False` mercurial doesn't prompt the
                      # used but instead uses a default value.
                      repo.ui.setconfig(b'ui', b'interactive', False)
                      rebase.rebase(baseui, repo, base=safe_bytes(source or ''), dest=safe_bytes(dest or ''),
                                    abort=abort, keep=not abort)
                  @reraise_safe_exceptions
                  def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
                      node = ctx.node()
                      date = (tag_time, tag_timezone)
                      try:
                          hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date)
                      except Abort as e:
                          log.exception("Tag operation aborted")
                          # Exception can contain unicode which we convert
                          raise exceptions.AbortException(e)(repr(e))
                  @reraise_safe_exceptions
                  def bookmark(self, wire, bookmark, revision=''):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
                      revision = revision or ''
                      commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
                  @reraise_safe_exceptions
                  def install_hooks(self, wire, force=False):
                      # we don't need any special hooks for Mercurial
                      pass
                  @reraise_safe_exceptions
                  def get_hooks_info(self, wire):
                      return {
                          'pre_version': vcsserver.__version__,
                          'post_version': vcsserver.__version__,
                      }
                  @reraise_safe_exceptions
                  def set_head_ref(self, wire, head_name):
                      pass
                  @reraise_safe_exceptions
                  def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
                                   archive_dir_name, commit_id, cache_config):
                      def file_walker(_commit_id, path):
                          repo = self._factory.repo(wire)
                          ctx = repo[_commit_id]
                          is_root = path in ['', '/']
                          if is_root:
                              matcher = alwaysmatcher(badfn=None)
                          else:
                              matcher = patternmatcher('', [(b'glob', path+'/**', b'')], badfn=None)
                          file_iter = ctx.manifest().walk(matcher)
                          for fn in file_iter:
                              file_path = fn
                              flags = ctx.flags(fn)
                              mode = b'x' in flags and 0o755 or 0o644
                              is_link = b'l' in flags
                              yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
                      return store_archive_in_cache(
                          file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)

vcsserver/subprocessio.py

0 +7 -7

              """
              Module provides a class allowing to wrap communication over subprocess.Popen
              input, output, error streams into a meaningfull, non-blocking, concurrent
              stream processor exposing the output data as an iterator fitting to be a
              return value passed by a WSGI applicaiton to a WSGI server per PEP 3333.
              Copyright (c) 2011  Daniel Dotsenko <dotsa[at]hotmail.com>
              This file is part of git_http_backend.py Project.
              git_http_backend.py Project is free software: you can redistribute it and/or
              modify it under the terms of the GNU Lesser General Public License as
              published by the Free Software Foundation, either version 2.1 of the License,
              or (at your option) any later version.
              git_http_backend.py Project is distributed in the hope that it will be useful,
              but WITHOUT ANY WARRANTY; without even the implied warranty of
              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
              GNU Lesser General Public License for more details.
              You should have received a copy of the GNU Lesser General Public License
              along with git_http_backend.py Project.
              If not, see <http://www.gnu.org/licenses/>.
              """
              import os
              import collections
              import logging
              import subprocess
              import threading
              from vcsserver.str_utils import safe_str
              log = logging.getLogger(__name__)
              class StreamFeeder(threading.Thread):
                  """
                  Normal writing into pipe-like is blocking once the buffer is filled.
                  This thread allows a thread to seep data from a file-like into a pipe
                  without blocking the main thread.
                  We close inpipe once the end of the source stream is reached.
                  """
                  def __init__(self, source):
-                     super(StreamFeeder, self).__init__()
+                     super().__init__()
                      self.daemon = True
                      filelike = False
-                     self.bytes = bytes()
-                     if type(source) in (type(''), bytes, bytearray):  # string-like
+                     self.bytes = b''
+                     if type(source) in (str, bytes, bytearray):  # string-like
                          self.bytes = bytes(source)
                      else:  # can be either file pointer or file-like
                          if isinstance(source, int):  # file pointer it is
                              # converting file descriptor (int) stdin into file-like
                              source = os.fdopen(source, 'rb', 16384)
                          # let's see if source is file-like by now
                          filelike = hasattr(source, 'read')
                      if not filelike and not self.bytes:
                          raise TypeError("StreamFeeder's source object must be a readable "
                                          "file-like, a file descriptor, or a string-like.")
                      self.source = source
                      self.readiface, self.writeiface = os.pipe()
                  def run(self):
                      writer = self.writeiface
                      try:
                          if self.bytes:
                              os.write(writer, self.bytes)
                          else:
                              s = self.source
                              while 1:
                                  _bytes = s.read(4096)
                                  if not _bytes:
                                      break
                                  os.write(writer, _bytes)
                      finally:
                          os.close(writer)
                  @property
                  def output(self):
                      return self.readiface
              class InputStreamChunker(threading.Thread):
                  def __init__(self, source, target, buffer_size, chunk_size):
-                     super(InputStreamChunker, self).__init__()
+                     super().__init__()
                      self.daemon = True  # die die die.
                      self.source = source
                      self.target = target
                      self.chunk_count_max = int(buffer_size / chunk_size) + 1
                      self.chunk_size = chunk_size
                      self.data_added = threading.Event()
                      self.data_added.clear()
                      self.keep_reading = threading.Event()
                      self.keep_reading.set()
                      self.EOF = threading.Event()
                      self.EOF.clear()
                      self.go = threading.Event()
                      self.go.set()
                  def stop(self):
                      self.go.clear()
                      self.EOF.set()
                      try:
                          # this is not proper, but is done to force the reader thread let
                          # go of the input because, if successful, .close() will send EOF
                          # down the pipe.
                          self.source.close()
                      except Exception:
                          pass
                  def run(self):
                      s = self.source
                      t = self.target
                      cs = self.chunk_size
                      chunk_count_max = self.chunk_count_max
                      keep_reading = self.keep_reading
                      da = self.data_added
                      go = self.go
                      try:
                          b = s.read(cs)
                      except ValueError:
                          b = ''
                      timeout_input = 20
                      while b and go.is_set():
                          if len(t) > chunk_count_max:
                              keep_reading.clear()
                              keep_reading.wait(timeout_input)
                              if len(t) > chunk_count_max + timeout_input:
                                  log.error("Timed out while waiting for input from subprocess.")
                                  os._exit(-1)  # this will cause the worker to recycle itself
                          t.append(b)
                          da.set()
                          try:
                              b = s.read(cs)
                          except ValueError:  # probably "I/O operation on closed file"
                              b = ''
                      self.EOF.set()
                      da.set()  # for cases when done but there was no input.
              class BufferedGenerator(object):
                  """
                  Class behaves as a non-blocking, buffered pipe reader.
                  Reads chunks of data (through a thread)
                  from a blocking pipe, and attaches these to an array (Deque) of chunks.
                  Reading is halted in the thread when max chunks is internally buffered.
                  The .next() may operate in blocking or non-blocking fashion by yielding
                  '' if no data is ready
                  to be sent or by not returning until there is some data to send
                  When we get EOF from underlying source pipe we raise the marker to raise
                  StopIteration after the last chunk of data is yielded.
                  """
                  def __init__(self, name, source, buffer_size=65536, chunk_size=4096,
                               starting_values=None, bottomless=False):
                      starting_values = starting_values or []
                      self.name = name
                      self.buffer_size = buffer_size
                      self.chunk_size = chunk_size
                      if bottomless:
                          maxlen = int(buffer_size / chunk_size)
                      else:
                          maxlen = None
                      self.data_queue = collections.deque(starting_values, maxlen)
                      self.worker = InputStreamChunker(source, self.data_queue, buffer_size, chunk_size)
                      if starting_values:
                          self.worker.data_added.set()
                      self.worker.start()
                  ####################
                  # Generator's methods
                  ####################
                  def __str__(self):
                      return f'BufferedGenerator(name={self.name} chunk: {self.chunk_size} on buffer: {self.buffer_size})'
                  def __iter__(self):
                      return self
                  def __next__(self):
                      while not self.length and not self.worker.EOF.is_set():
                          self.worker.data_added.clear()
                          self.worker.data_added.wait(0.2)
                      if self.length:
                          self.worker.keep_reading.set()
                          return bytes(self.data_queue.popleft())
                      elif self.worker.EOF.is_set():
                          raise StopIteration
                  def throw(self, exc_type, value=None, traceback=None):
                      if not self.worker.EOF.is_set():
                          raise exc_type(value)
                  def start(self):
                      self.worker.start()
                  def stop(self):
                      self.worker.stop()
                  def close(self):
                      try:
                          self.worker.stop()
                          self.throw(GeneratorExit)
                      except (GeneratorExit, StopIteration):
                          pass
                  ####################
                  # Threaded reader's infrastructure.
                  ####################
                  @property
                  def input(self):
                      return self.worker.w
                  @property
                  def data_added_event(self):
                      return self.worker.data_added
                  @property
                  def data_added(self):
                      return self.worker.data_added.is_set()
                  @property
                  def reading_paused(self):
                      return not self.worker.keep_reading.is_set()
                  @property
                  def done_reading_event(self):
                      """
                      Done_reding does not mean that the iterator's buffer is empty.
                      Iterator might have done reading from underlying source, but the read
                      chunks might still be available for serving through .next() method.
                      :returns: An Event class instance.
                      """
                      return self.worker.EOF
                  @property
                  def done_reading(self):
                      """
                      Done_reading does not mean that the iterator's buffer is empty.
                      Iterator might have done reading from underlying source, but the read
                      chunks might still be available for serving through .next() method.
                      :returns: An Bool value.
                      """
                      return self.worker.EOF.is_set()
                  @property
                  def length(self):
                      """
                      returns int.
                      This is the length of the queue of chunks, not the length of
                      the combined contents in those chunks.
                      __len__() cannot be meaningfully implemented because this
                      reader is just flying through a bottomless pit content and
                      can only know the length of what it already saw.
                      If __len__() on WSGI server per PEP 3333 returns a value,
                      the response's length will be set to that. In order not to
                      confuse WSGI PEP3333 servers, we will not implement __len__
                      at all.
                      """
                      return len(self.data_queue)
                  def prepend(self, x):
                      self.data_queue.appendleft(x)
                  def append(self, x):
                      self.data_queue.append(x)
                  def extend(self, o):
                      self.data_queue.extend(o)
                  def __getitem__(self, i):
                      return self.data_queue[i]
              class SubprocessIOChunker(object):
                  """
                  Processor class wrapping handling of subprocess IO.
                  .. important::
                     Watch out for the method `__del__` on this class. If this object
                     is deleted, it will kill the subprocess, so avoid to
                     return the `output` attribute or usage of it like in the following
                     example::
                        # `args` expected to run a program that produces a lot of output
                        output = ''.join(SubprocessIOChunker(
                           args, shell=False, inputstream=inputstream, env=environ).output)
                        # `output` will not contain all the data, because the __del__ method
                        # has already killed the subprocess in this case before all output
                        # has been consumed.
                  In a way, this is a "communicate()" replacement with a twist.
                  - We are multithreaded. Writing in and reading out, err are all sep threads.
                  - We support concurrent (in and out) stream processing.
                  - The output is not a stream. It's a queue of read string (bytes, not str)
                    chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
                  - We are non-blocking in more respects than communicate()
                    (reading from subprocess out pauses when internal buffer is full, but
                     does not block the parent calling code. On the flip side, reading from
                     slow-yielding subprocess may block the iteration until data shows up. This
                     does not block the parallel inpipe reading occurring parallel thread.)
                  The purpose of the object is to allow us to wrap subprocess interactions into
                  an iterable that can be passed to a WSGI server as the application's return
                  value. Because of stream-processing-ability, WSGI does not have to read ALL
                  of the subprocess's output and buffer it, before handing it to WSGI server for
                  HTTP response. Instead, the class initializer reads just a bit of the stream
                  to figure out if error occurred or likely to occur and if not, just hands the
                  further iteration over subprocess output to the server for completion of HTTP
                  response.
                  The real or perceived subprocess error is trapped and raised as one of
                  OSError family of exceptions
                  Example usage:
                  #    try:
                  #        answer = SubprocessIOChunker(
                  #            cmd,
                  #            input,
                  #            buffer_size = 65536,
                  #            chunk_size = 4096
                  #            )
                  #    except (OSError) as e:
                  #        print str(e)
                  #        raise e
                  #
                  #    return answer
                  """
                  # TODO: johbo: This is used to make sure that the open end of the PIPE
                  # is closed in the end. It would be way better to wrap this into an
                  # object, so that it is closed automatically once it is consumed or
                  # something similar.
                  _close_input_fd = None
                  _closed = False
                  _stdout = None
                  _stderr = None
                  def __init__(self, cmd, input_stream=None, buffer_size=65536,
                               chunk_size=4096, starting_values=None, fail_on_stderr=True,
                               fail_on_return_code=True, **kwargs):
                      """
                      Initializes SubprocessIOChunker
                      :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
                      :param input_stream: (Default: None) A file-like, string, or file pointer.
                      :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
                      :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
                      :param starting_values: (Default: []) An array of strings to put in front of output que.
                      :param fail_on_stderr: (Default: True) Whether to raise an exception in
                                             case something is written to stderr.
                      :param fail_on_return_code: (Default: True) Whether to raise an
                                                  exception if the return code is not 0.
                      """
                      kwargs['shell'] = kwargs.get('shell', True)
                      starting_values = starting_values or []
                      if input_stream:
                          input_streamer = StreamFeeder(input_stream)
                          input_streamer.start()
                          input_stream = input_streamer.output
                          self._close_input_fd = input_stream
                      self._fail_on_stderr = fail_on_stderr
                      self._fail_on_return_code = fail_on_return_code
                      self.cmd = cmd
                      _p = subprocess.Popen(cmd, bufsize=-1, stdin=input_stream, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                            **kwargs)
                      self.process = _p
                      bg_out = BufferedGenerator('stdout', _p.stdout, buffer_size, chunk_size, starting_values)
                      bg_err = BufferedGenerator('stderr', _p.stderr, 10240, 1, bottomless=True)
                      while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length:
                          # doing this until we reach either end of file, or end of buffer.
                          bg_out.data_added_event.wait(0.2)
                          bg_out.data_added_event.clear()
                      # at this point it's still ambiguous if we are done reading or just full buffer.
                      # Either way, if error (returned by ended process, or implied based on
                      # presence of stuff in stderr output) we error out.
                      # Else, we are happy.
                      return_code = _p.poll()
                      ret_code_ok = return_code in [None, 0]
                      ret_code_fail = return_code is not None and return_code != 0
                      if (
                          (ret_code_fail and fail_on_return_code) or
                          (ret_code_ok and fail_on_stderr and bg_err.length)
                      ):
                          try:
                              _p.terminate()
                          except Exception:
                              pass
                          bg_out.stop()
                          out = b''.join(bg_out)
                          self._stdout = out
                          bg_err.stop()
                          err = b''.join(bg_err)
                          self._stderr = err
                          # code from https://github.com/schacon/grack/pull/7
                          if err.strip() == b'fatal: The remote end hung up unexpectedly' and out.startswith(b'0034shallow '):
                              bg_out = iter([out])
                              _p = None
                          elif err and fail_on_stderr:
                              text_err = err.decode()
                              raise OSError(
-                                 "Subprocess exited due to an error:\n{}".format(text_err))
+                                 f"Subprocess exited due to an error:\n{text_err}")
                          if ret_code_fail and fail_on_return_code:
                              text_err = err.decode()
                              if not err:
                                  # maybe get empty stderr, try stdout instead
                                  # in many cases git reports the errors on stdout too
                                  text_err = out.decode()
                              raise OSError(
-                                 "Subprocess exited with non 0 ret code:{}: stderr:{}".format(return_code, text_err))
+                                 f"Subprocess exited with non 0 ret code:{return_code}: stderr:{text_err}")
                      self.stdout = bg_out
                      self.stderr = bg_err
                      self.inputstream = input_stream
                  def __str__(self):
                      proc = getattr(self, 'process', 'NO_PROCESS')
                      return f'SubprocessIOChunker: {proc}'
                  def __iter__(self):
                      return self
                  def __next__(self):
                      # Note: mikhail: We need to be sure that we are checking the return
                      # code after the stdout stream is closed. Some processes, e.g. git
                      # are doing some magic in between closing stdout and terminating the
                      # process and, as a result,  we are not getting return code on "slow"
                      # systems.
                      result = None
                      stop_iteration = None
                      try:
                          result = next(self.stdout)
                      except StopIteration as e:
                          stop_iteration = e
                      if self.process:
                          return_code = self.process.poll()
                          ret_code_fail = return_code is not None and return_code != 0
                          if ret_code_fail and self._fail_on_return_code:
                              self.stop_streams()
                              err = self.get_stderr()
                              raise OSError(
-                                 "Subprocess exited (exit_code:{}) due to an error during iteration:\n{}".format(return_code, err))
+                                 f"Subprocess exited (exit_code:{return_code}) due to an error during iteration:\n{err}")
                      if stop_iteration:
                          raise stop_iteration
                      return result
                  def throw(self, exc_type, value=None, traceback=None):
                      if self.stdout.length or not self.stdout.done_reading:
                          raise exc_type(value)
                  def close(self):
                      if self._closed:
                          return
                      try:
                          self.process.terminate()
                      except Exception:
                          pass
                      if self._close_input_fd:
                          os.close(self._close_input_fd)
                      try:
                          self.stdout.close()
                      except Exception:
                          pass
                      try:
                          self.stderr.close()
                      except Exception:
                          pass
                      try:
                          os.close(self.inputstream)
                      except Exception:
                          pass
                      self._closed = True
                  def stop_streams(self):
                      getattr(self.stdout, 'stop', lambda: None)()
                      getattr(self.stderr, 'stop', lambda: None)()
                  def get_stdout(self):
                      if self._stdout:
                          return self._stdout
                      else:
                          return b''.join(self.stdout)
                  def get_stderr(self):
                      if self._stderr:
                          return self._stderr
                      else:
                          return b''.join(self.stderr)
              def run_command(arguments, env=None):
                  """
                  Run the specified command and return the stdout.
                  :param arguments: sequence of program arguments (including the program name)
                  :type arguments: list[str]
                  """
                  cmd = arguments
                  log.debug('Running subprocessio command %s', cmd)
                  proc = None
                  try:
                      _opts = {'shell': False, 'fail_on_stderr': False}
                      if env:
                          _opts.update({'env': env})
                      proc = SubprocessIOChunker(cmd, **_opts)
                      return b''.join(proc), b''.join(proc.stderr)
                  except OSError as err:
                      cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
                      tb_err = ("Couldn't run subprocessio command (%s).\n"
                                "Original error was:%s\n" % (cmd, err))
                      log.exception(tb_err)
                      raise Exception(tb_err)
                  finally:
                      if proc:
                          proc.close()

vcsserver/tweens/request_wrapper.py

0 +4 -4

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import base64
              import time
              import logging
              import msgpack
              import vcsserver
              from vcsserver.str_utils import safe_str, ascii_str
              log = logging.getLogger(__name__)
              def get_access_path(environ):
                  path = environ.get('PATH_INFO')
                  return path
              def get_user_agent(environ):
                  return environ.get('HTTP_USER_AGENT')
              def get_call_context(request) -> dict:
                  cc = {}
                  registry = request.registry
                  if hasattr(registry, 'vcs_call_context'):
                      cc.update({
                          'X-RC-Method': registry.vcs_call_context.get('method'),
                          'X-RC-Repo-Name': registry.vcs_call_context.get('repo_name')
                      })
                  return cc
              def get_headers_call_context(environ, strict=True):
                  if 'HTTP_X_RC_VCS_STREAM_CALL_CONTEXT' in environ:
                      packed_cc = base64.b64decode(environ['HTTP_X_RC_VCS_STREAM_CALL_CONTEXT'])
                      return msgpack.unpackb(packed_cc)
                  elif strict:
                      raise ValueError('Expected header HTTP_X_RC_VCS_STREAM_CALL_CONTEXT not found')
              class RequestWrapperTween(object):
                  def __init__(self, handler, registry):
                      self.handler = handler
                      self.registry = registry
                      # one-time configuration code goes here
                  def __call__(self, request):
                      start = time.time()
                      log.debug('Starting request time measurement')
                      response = None
                      try:
                          response = self.handler(request)
                      finally:
                          ua = get_user_agent(request.environ)
                          call_context = get_call_context(request)
                          vcs_method = call_context.get('X-RC-Method', '_NO_VCS_METHOD')
                          repo_name = call_context.get('X-RC-Repo-Name', '')
                          count = request.request_count()
                          _ver_ = vcsserver.__version__
                          _path = safe_str(get_access_path(request.environ))
                          ip = '127.0.0.1'
                          match_route = request.matched_route.name if request.matched_route else "NOT_FOUND"
                          resp_code = getattr(response, 'status_code', 'UNDEFINED')
                          _view_path = f"{repo_name}@{_path}/{vcs_method}"
                          total = time.time() - start
                          log.info(
                              'Req[%4s] IP: %s %s Request to %s time: %.4fs [%s], VCSServer %s',
                              count, ip, request.environ.get('REQUEST_METHOD'),
                              _view_path, total, ua, _ver_,
                              extra={"time": total, "ver": _ver_, "code": resp_code,
                                     "path": _path, "view_name": match_route, "user_agent": ua,
                                     "vcs_method": vcs_method, "repo_name": repo_name}
                          )
                          statsd = request.registry.statsd
                          if statsd:
                              match_route = request.matched_route.name if request.matched_route else _path
                              elapsed_time_ms = round(1000.0 * total)  # use ms only
                              statsd.timing(
                                  "vcsserver_req_timing.histogram", elapsed_time_ms,
                                  tags=[
-                                     "view_name:{}".format(match_route),
-                                     "code:{}".format(resp_code)
+                                     f"view_name:{match_route}",
+                                     f"code:{resp_code}"
                                  ],
                                  use_decimals=False
                              )
                              statsd.incr(
                                  "vcsserver_req_total", tags=[
-                                     "view_name:{}".format(match_route),
-                                     "code:{}".format(resp_code)
+                                     f"view_name:{match_route}",
+                                     f"code:{resp_code}"
                                  ])
                      return response
              def includeme(config):
                  config.add_tween(
                      'vcsserver.tweens.request_wrapper.RequestWrapperTween',
                  )

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages