|
|
# RhodeCode VCSServer provides access to different vcs backends via network.
|
|
|
# Copyright (C) 2014-2023 RhodeCode GmbH
|
|
|
#
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
|
# (at your option) any later version.
|
|
|
#
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
# GNU General Public License for more details.
|
|
|
#
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
# along with this program; if not, write to the Free Software Foundation,
|
|
|
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
|
import collections
|
|
|
import logging
|
|
|
import os
|
|
|
import re
|
|
|
import stat
|
|
|
import traceback
|
|
|
import urllib.request
|
|
|
import urllib.parse
|
|
|
import urllib.error
|
|
|
from functools import wraps
|
|
|
|
|
|
import more_itertools
|
|
|
import pygit2
|
|
|
from pygit2 import Repository as LibGit2Repo
|
|
|
from pygit2 import index as LibGit2Index
|
|
|
from dulwich import index, objects
|
|
|
from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
|
|
|
from dulwich.errors import (
|
|
|
NotGitRepository, ChecksumMismatch, WrongObjectException,
|
|
|
MissingCommitError, ObjectMissing, HangupException,
|
|
|
UnexpectedCommandError)
|
|
|
from dulwich.repo import Repo as DulwichRepo
|
|
|
|
|
|
import rhodecode
|
|
|
from vcsserver import exceptions, settings, subprocessio
|
|
|
from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
|
|
|
from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
|
|
|
from vcsserver.hgcompat import (
|
|
|
hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
|
|
|
from vcsserver.git_lfs.lib import LFSOidStore
|
|
|
from vcsserver.vcs_base import RemoteBase
|
|
|
|
|
|
DIR_STAT = stat.S_IFDIR
|
|
|
FILE_MODE = stat.S_IFMT
|
|
|
GIT_LINK = objects.S_IFGITLINK
|
|
|
PEELED_REF_MARKER = b'^{}'
|
|
|
HEAD_MARKER = b'HEAD'
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
def reraise_safe_exceptions(func):
|
|
|
"""Converts Dulwich exceptions to something neutral."""
|
|
|
|
|
|
@wraps(func)
|
|
|
def wrapper(*args, **kwargs):
|
|
|
try:
|
|
|
return func(*args, **kwargs)
|
|
|
except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
|
|
|
exc = exceptions.LookupException(org_exc=e)
|
|
|
raise exc(safe_str(e))
|
|
|
except (HangupException, UnexpectedCommandError) as e:
|
|
|
exc = exceptions.VcsException(org_exc=e)
|
|
|
raise exc(safe_str(e))
|
|
|
except Exception:
|
|
|
# NOTE(marcink): because of how dulwich handles some exceptions
|
|
|
# (KeyError on empty repos), we cannot track this and catch all
|
|
|
# exceptions, it's an exceptions from other handlers
|
|
|
#if not hasattr(e, '_vcs_kind'):
|
|
|
#log.exception("Unhandled exception in git remote call")
|
|
|
#raise_from_original(exceptions.UnhandledException)
|
|
|
raise
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
class Repo(DulwichRepo):
|
|
|
"""
|
|
|
A wrapper for dulwich Repo class.
|
|
|
|
|
|
Since dulwich is sometimes keeping .idx file descriptors open, it leads to
|
|
|
"Too many open files" error. We need to close all opened file descriptors
|
|
|
once the repo object is destroyed.
|
|
|
"""
|
|
|
def __del__(self):
|
|
|
if hasattr(self, 'object_store'):
|
|
|
self.close()
|
|
|
|
|
|
|
|
|
class Repository(LibGit2Repo):
|
|
|
|
|
|
def __enter__(self):
|
|
|
return self
|
|
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
|
self.free()
|
|
|
|
|
|
|
|
|
class GitFactory(RepoFactory):
|
|
|
repo_type = 'git'
|
|
|
|
|
|
def _create_repo(self, wire, create, use_libgit2=False):
|
|
|
if use_libgit2:
|
|
|
repo = Repository(safe_bytes(wire['path']))
|
|
|
else:
|
|
|
# dulwich mode
|
|
|
repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
|
|
|
repo = Repo(repo_path)
|
|
|
|
|
|
log.debug('repository created: got GIT object: %s', repo)
|
|
|
return repo
|
|
|
|
|
|
def repo(self, wire, create=False, use_libgit2=False):
|
|
|
"""
|
|
|
Get a repository instance for the given path.
|
|
|
"""
|
|
|
return self._create_repo(wire, create, use_libgit2)
|
|
|
|
|
|
def repo_libgit2(self, wire):
|
|
|
return self.repo(wire, use_libgit2=True)
|
|
|
|
|
|
|
|
|
def create_signature_from_string(author_str, **kwargs):
|
|
|
"""
|
|
|
Creates a pygit2.Signature object from a string of the format 'Name <email>'.
|
|
|
|
|
|
:param author_str: String of the format 'Name <email>'
|
|
|
:return: pygit2.Signature object
|
|
|
"""
|
|
|
match = re.match(r'^(.+) <(.+)>$', author_str)
|
|
|
if match is None:
|
|
|
raise ValueError(f"Invalid format: {author_str}")
|
|
|
|
|
|
name, email = match.groups()
|
|
|
return pygit2.Signature(name, email, **kwargs)
|
|
|
|
|
|
|
|
|
def get_obfuscated_url(url_obj):
|
|
|
url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
|
|
|
url_obj.query = obfuscate_qs(url_obj.query)
|
|
|
obfuscated_uri = str(url_obj)
|
|
|
return obfuscated_uri
|
|
|
|
|
|
|
|
|
class GitRemote(RemoteBase):
|
|
|
|
|
|
def __init__(self, factory):
|
|
|
self._factory = factory
|
|
|
self._bulk_methods = {
|
|
|
"date": self.date,
|
|
|
"author": self.author,
|
|
|
"branch": self.branch,
|
|
|
"message": self.message,
|
|
|
"parents": self.parents,
|
|
|
"_commit": self.revision,
|
|
|
}
|
|
|
self._bulk_file_methods = {
|
|
|
"size": self.get_node_size,
|
|
|
"data": self.get_node_data,
|
|
|
"flags": self.get_node_flags,
|
|
|
"is_binary": self.get_node_is_binary,
|
|
|
"md5": self.md5_hash
|
|
|
}
|
|
|
|
|
|
def _wire_to_config(self, wire):
|
|
|
if 'config' in wire:
|
|
|
return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
|
|
|
return {}
|
|
|
|
|
|
def _remote_conf(self, config):
|
|
|
params = [
|
|
|
'-c', 'core.askpass=""',
|
|
|
]
|
|
|
config_attrs = {
|
|
|
'vcs_ssl_dir': 'http.sslCAinfo={}',
|
|
|
'vcs_git_lfs_store_location': 'lfs.storage={}'
|
|
|
}
|
|
|
for key, param in config_attrs.items():
|
|
|
if value := config.get(key):
|
|
|
params.extend(['-c', param.format(value)])
|
|
|
return params
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def discover_git_version(self):
|
|
|
stdout, _ = self.run_git_command(
|
|
|
{}, ['--version'], _bare=True, _safe=True)
|
|
|
prefix = b'git version'
|
|
|
if stdout.startswith(prefix):
|
|
|
stdout = stdout[len(prefix):]
|
|
|
return safe_str(stdout.strip())
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def is_empty(self, wire):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
try:
|
|
|
has_head = repo.head.name
|
|
|
if has_head:
|
|
|
return False
|
|
|
|
|
|
# NOTE(marcink): check again using more expensive method
|
|
|
return repo.is_empty
|
|
|
except Exception:
|
|
|
pass
|
|
|
|
|
|
return True
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def assert_correct_path(self, wire):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _assert_correct_path(_context_uid, _repo_id, fast_check):
|
|
|
if fast_check:
|
|
|
path = safe_str(wire['path'])
|
|
|
if pygit2.discover_repository(path):
|
|
|
return True
|
|
|
return False
|
|
|
else:
|
|
|
try:
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init:
|
|
|
pass
|
|
|
except pygit2.GitError:
|
|
|
path = wire.get('path')
|
|
|
tb = traceback.format_exc()
|
|
|
log.debug("Invalid Git path `%s`, tb: %s", path, tb)
|
|
|
return False
|
|
|
return True
|
|
|
|
|
|
return _assert_correct_path(context_uid, repo_id, True)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def bare(self, wire):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
return repo.is_bare
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_node_data(self, wire, commit_id, path):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
blob_obj = commit.tree[path]
|
|
|
|
|
|
if blob_obj.type != pygit2.GIT_OBJ_BLOB:
|
|
|
raise exceptions.LookupException()(
|
|
|
f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
|
|
|
|
|
|
return BytesEnvelope(blob_obj.data)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_node_size(self, wire, commit_id, path):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
blob_obj = commit.tree[path]
|
|
|
|
|
|
if blob_obj.type != pygit2.GIT_OBJ_BLOB:
|
|
|
raise exceptions.LookupException()(
|
|
|
f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
|
|
|
|
|
|
return blob_obj.size
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_node_flags(self, wire, commit_id, path):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
blob_obj = commit.tree[path]
|
|
|
|
|
|
if blob_obj.type != pygit2.GIT_OBJ_BLOB:
|
|
|
raise exceptions.LookupException()(
|
|
|
f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
|
|
|
|
|
|
return blob_obj.filemode
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_node_is_binary(self, wire, commit_id, path):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
blob_obj = commit.tree[path]
|
|
|
|
|
|
if blob_obj.type != pygit2.GIT_OBJ_BLOB:
|
|
|
raise exceptions.LookupException()(
|
|
|
f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
|
|
|
|
|
|
return blob_obj.is_binary
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def blob_as_pretty_string(self, wire, sha):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
blob_obj = repo[sha]
|
|
|
return BytesEnvelope(blob_obj.data)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def blob_raw_length(self, wire, sha):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _blob_raw_length(_repo_id, _sha):
|
|
|
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
blob = repo[sha]
|
|
|
return blob.size
|
|
|
|
|
|
return _blob_raw_length(repo_id, sha)
|
|
|
|
|
|
def _parse_lfs_pointer(self, raw_content):
|
|
|
spec_string = b'version https://git-lfs.github.com/spec'
|
|
|
if raw_content and raw_content.startswith(spec_string):
|
|
|
|
|
|
pattern = re.compile(rb"""
|
|
|
(?:\n)?
|
|
|
^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
|
|
|
^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
|
|
|
^size[ ](?P<oid_size>[0-9]+)\n
|
|
|
(?:\n)?
|
|
|
""", re.VERBOSE | re.MULTILINE)
|
|
|
match = pattern.match(raw_content)
|
|
|
if match:
|
|
|
return match.groupdict()
|
|
|
|
|
|
return {}
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def is_large_file(self, wire, commit_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _is_large_file(_repo_id, _sha):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
blob = repo[commit_id]
|
|
|
if blob.is_binary:
|
|
|
return {}
|
|
|
|
|
|
return self._parse_lfs_pointer(blob.data)
|
|
|
|
|
|
return _is_large_file(repo_id, commit_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def is_binary(self, wire, tree_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _is_binary(_repo_id, _tree_id):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
blob_obj = repo[tree_id]
|
|
|
return blob_obj.is_binary
|
|
|
|
|
|
return _is_binary(repo_id, tree_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def md5_hash(self, wire, commit_id, path):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _md5_hash(_repo_id, _commit_id, _path):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[_commit_id]
|
|
|
blob_obj = commit.tree[_path]
|
|
|
|
|
|
if blob_obj.type != pygit2.GIT_OBJ_BLOB:
|
|
|
raise exceptions.LookupException()(
|
|
|
f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
|
|
|
|
|
|
return ''
|
|
|
|
|
|
return _md5_hash(repo_id, commit_id, path)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def in_largefiles_store(self, wire, oid):
|
|
|
conf = self._wire_to_config(wire)
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
repo_name = repo.path
|
|
|
|
|
|
store_location = conf.get('vcs_git_lfs_store_location')
|
|
|
if store_location:
|
|
|
|
|
|
store = LFSOidStore(
|
|
|
oid=oid, repo=repo_name, store_location=store_location)
|
|
|
return store.has_oid()
|
|
|
|
|
|
return False
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def store_path(self, wire, oid):
|
|
|
conf = self._wire_to_config(wire)
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
repo_name = repo.path
|
|
|
|
|
|
store_location = conf.get('vcs_git_lfs_store_location')
|
|
|
if store_location:
|
|
|
store = LFSOidStore(
|
|
|
oid=oid, repo=repo_name, store_location=store_location)
|
|
|
return store.oid_path
|
|
|
raise ValueError(f'Unable to fetch oid with path {oid}')
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def bulk_request(self, wire, rev, pre_load):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _bulk_request(_repo_id, _rev, _pre_load):
|
|
|
result = {}
|
|
|
for attr in pre_load:
|
|
|
try:
|
|
|
method = self._bulk_methods[attr]
|
|
|
wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
|
|
|
args = [wire, rev]
|
|
|
result[attr] = method(*args)
|
|
|
except KeyError as e:
|
|
|
raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
|
|
|
return result
|
|
|
|
|
|
return _bulk_request(repo_id, rev, sorted(pre_load))
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def bulk_file_request(self, wire, commit_id, path, pre_load):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
|
|
|
result = {}
|
|
|
for attr in pre_load:
|
|
|
try:
|
|
|
method = self._bulk_file_methods[attr]
|
|
|
wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
|
|
|
result[attr] = method(wire, _commit_id, _path)
|
|
|
except KeyError as e:
|
|
|
raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
|
|
|
return result
|
|
|
|
|
|
return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
|
|
|
|
|
|
def _build_opener(self, url: str):
|
|
|
handlers = []
|
|
|
url_obj = url_parser(safe_bytes(url))
|
|
|
authinfo = url_obj.authinfo()[1]
|
|
|
|
|
|
if authinfo:
|
|
|
# create a password manager
|
|
|
passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
|
|
|
passmgr.add_password(*convert_to_str(authinfo))
|
|
|
|
|
|
handlers.extend((httpbasicauthhandler(passmgr),
|
|
|
httpdigestauthhandler(passmgr)))
|
|
|
|
|
|
return urllib.request.build_opener(*handlers)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def check_url(self, url, config):
|
|
|
url_obj = url_parser(safe_bytes(url))
|
|
|
|
|
|
test_uri = safe_str(url_obj.authinfo()[0])
|
|
|
obfuscated_uri = get_obfuscated_url(url_obj)
|
|
|
|
|
|
log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
|
|
|
|
|
|
if not test_uri.endswith('info/refs'):
|
|
|
test_uri = test_uri.rstrip('/') + '/info/refs'
|
|
|
|
|
|
o = self._build_opener(url=url)
|
|
|
o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
|
|
|
|
|
|
q = {"service": 'git-upload-pack'}
|
|
|
qs = f'?{urllib.parse.urlencode(q)}'
|
|
|
cu = f"{test_uri}{qs}"
|
|
|
|
|
|
try:
|
|
|
req = urllib.request.Request(cu, None, {})
|
|
|
log.debug("Trying to open URL %s", obfuscated_uri)
|
|
|
resp = o.open(req)
|
|
|
if resp.code != 200:
|
|
|
raise exceptions.URLError()('Return Code is not 200')
|
|
|
except Exception as e:
|
|
|
log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
|
|
|
# means it cannot be cloned
|
|
|
raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
|
|
|
|
|
|
# now detect if it's proper git repo
|
|
|
gitdata: bytes = resp.read()
|
|
|
|
|
|
if b'service=git-upload-pack' in gitdata:
|
|
|
pass
|
|
|
elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
|
|
|
# old style git can return some other format!
|
|
|
pass
|
|
|
else:
|
|
|
e = None
|
|
|
raise exceptions.URLError(e)(
|
|
|
f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
|
|
|
|
|
|
return True
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def clone(self, wire, url, deferred, valid_refs, update_after_clone):
|
|
|
# TODO(marcink): deprecate this method. Last i checked we don't use it anymore
|
|
|
remote_refs = self.pull(wire, url, apply_refs=False)
|
|
|
repo = self._factory.repo(wire)
|
|
|
if isinstance(valid_refs, list):
|
|
|
valid_refs = tuple(valid_refs)
|
|
|
|
|
|
for k in remote_refs:
|
|
|
# only parse heads/tags and skip so called deferred tags
|
|
|
if k.startswith(valid_refs) and not k.endswith(deferred):
|
|
|
repo[k] = remote_refs[k]
|
|
|
|
|
|
if update_after_clone:
|
|
|
# we want to checkout HEAD
|
|
|
repo["HEAD"] = remote_refs["HEAD"]
|
|
|
index.build_index_from_tree(repo.path, repo.index_path(),
|
|
|
repo.object_store, repo["HEAD"].tree)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def branch(self, wire, commit_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _branch(_context_uid, _repo_id, _commit_id):
|
|
|
regex = re.compile('^refs/heads')
|
|
|
|
|
|
def filter_with(ref):
|
|
|
return regex.match(ref[0]) and ref[1] == _commit_id
|
|
|
|
|
|
branches = list(filter(filter_with, list(self.get_refs(wire).items())))
|
|
|
return [x[0].split('refs/heads/')[-1] for x in branches]
|
|
|
|
|
|
return _branch(context_uid, repo_id, commit_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def commit_branches(self, wire, commit_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _commit_branches(_context_uid, _repo_id, _commit_id):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
branches = [x for x in repo.branches.with_commit(_commit_id)]
|
|
|
return branches
|
|
|
|
|
|
return _commit_branches(context_uid, repo_id, commit_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def add_object(self, wire, content):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
blob = objects.Blob()
|
|
|
blob.set_raw_string(content)
|
|
|
repo.object_store.add_object(blob)
|
|
|
return blob.id
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def create_commit(self, wire, author, committer, message, branch, new_tree_id,
|
|
|
date_args: list[int, int] = None,
|
|
|
parents: list | None = None):
|
|
|
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
|
|
|
if date_args:
|
|
|
current_time, offset = date_args
|
|
|
|
|
|
kw = {
|
|
|
'time': current_time,
|
|
|
'offset': offset
|
|
|
}
|
|
|
author = create_signature_from_string(author, **kw)
|
|
|
committer = create_signature_from_string(committer, **kw)
|
|
|
|
|
|
tree = new_tree_id
|
|
|
if isinstance(tree, (bytes, str)):
|
|
|
# validate this tree is in the repo...
|
|
|
tree = repo[safe_str(tree)].id
|
|
|
|
|
|
if parents:
|
|
|
# run via sha's and validate them in repo
|
|
|
parents = [repo[c].id for c in parents]
|
|
|
else:
|
|
|
parents = []
|
|
|
# ensure we COMMIT on top of given branch head
|
|
|
# check if this repo has ANY branches, otherwise it's a new branch case we need to make
|
|
|
if branch in repo.branches.local:
|
|
|
parents += [repo.branches[branch].target]
|
|
|
elif [x for x in repo.branches.local]:
|
|
|
parents += [repo.head.target]
|
|
|
#else:
|
|
|
# in case we want to commit on new branch we create it on top of HEAD
|
|
|
#repo.branches.local.create(branch, repo.revparse_single('HEAD'))
|
|
|
|
|
|
# # Create a new commit
|
|
|
commit_oid = repo.create_commit(
|
|
|
f'refs/heads/{branch}', # the name of the reference to update
|
|
|
author, # the author of the commit
|
|
|
committer, # the committer of the commit
|
|
|
message, # the commit message
|
|
|
tree, # the tree produced by the index
|
|
|
parents # list of parents for the new commit, usually just one,
|
|
|
)
|
|
|
|
|
|
new_commit_id = safe_str(commit_oid)
|
|
|
|
|
|
return new_commit_id
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
|
|
|
|
|
|
def mode2pygit(mode):
|
|
|
"""
|
|
|
git only supports two filemode 644 and 755
|
|
|
|
|
|
0o100755 -> 33261
|
|
|
0o100644 -> 33188
|
|
|
"""
|
|
|
return {
|
|
|
0o100644: pygit2.GIT_FILEMODE_BLOB,
|
|
|
0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
|
|
|
0o120000: pygit2.GIT_FILEMODE_LINK
|
|
|
}.get(mode) or pygit2.GIT_FILEMODE_BLOB
|
|
|
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
repo_index = repo.index
|
|
|
|
|
|
commit_parents = None
|
|
|
if commit_tree and commit_data['parents']:
|
|
|
commit_parents = commit_data['parents']
|
|
|
parent_commit = repo[commit_parents[0]]
|
|
|
repo_index.read_tree(parent_commit.tree)
|
|
|
|
|
|
for pathspec in updated:
|
|
|
blob_id = repo.create_blob(pathspec['content'])
|
|
|
ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
|
|
|
repo_index.add(ie)
|
|
|
|
|
|
for pathspec in removed:
|
|
|
repo_index.remove(pathspec)
|
|
|
|
|
|
# Write changes to the index
|
|
|
repo_index.write()
|
|
|
|
|
|
# Create a tree from the updated index
|
|
|
written_commit_tree = repo_index.write_tree()
|
|
|
|
|
|
new_tree_id = written_commit_tree
|
|
|
|
|
|
author = commit_data['author']
|
|
|
committer = commit_data['committer']
|
|
|
message = commit_data['message']
|
|
|
|
|
|
date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
|
|
|
|
|
|
new_commit_id = self.create_commit(wire, author, committer, message, branch,
|
|
|
new_tree_id, date_args=date_args, parents=commit_parents)
|
|
|
|
|
|
# libgit2, ensure the branch is there and exists
|
|
|
self.create_branch(wire, branch, new_commit_id)
|
|
|
|
|
|
# libgit2, set new ref to this created commit
|
|
|
self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
|
|
|
|
|
|
return new_commit_id
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
|
|
|
if url != 'default' and '://' not in url:
|
|
|
client = LocalGitClient(url)
|
|
|
else:
|
|
|
url_obj = url_parser(safe_bytes(url))
|
|
|
o = self._build_opener(url)
|
|
|
url = url_obj.authinfo()[0]
|
|
|
client = HttpGitClient(base_url=url, opener=o)
|
|
|
repo = self._factory.repo(wire)
|
|
|
|
|
|
determine_wants = repo.object_store.determine_wants_all
|
|
|
|
|
|
if refs:
|
|
|
refs: list[bytes] = [ascii_bytes(x) for x in refs]
|
|
|
|
|
|
def determine_wants_requested(_remote_refs):
|
|
|
determined = []
|
|
|
for ref_name, ref_hash in _remote_refs.items():
|
|
|
bytes_ref_name = safe_bytes(ref_name)
|
|
|
|
|
|
if bytes_ref_name in refs:
|
|
|
bytes_ref_hash = safe_bytes(ref_hash)
|
|
|
determined.append(bytes_ref_hash)
|
|
|
return determined
|
|
|
|
|
|
# swap with our custom requested wants
|
|
|
determine_wants = determine_wants_requested
|
|
|
|
|
|
try:
|
|
|
remote_refs = client.fetch(
|
|
|
path=url, target=repo, determine_wants=determine_wants)
|
|
|
|
|
|
except NotGitRepository as e:
|
|
|
log.warning(
|
|
|
'Trying to fetch from "%s" failed, not a Git repository.', url)
|
|
|
# Exception can contain unicode which we convert
|
|
|
raise exceptions.AbortException(e)(repr(e))
|
|
|
|
|
|
# mikhail: client.fetch() returns all the remote refs, but fetches only
|
|
|
# refs filtered by `determine_wants` function. We need to filter result
|
|
|
# as well
|
|
|
if refs:
|
|
|
remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
|
|
|
|
|
|
if apply_refs:
|
|
|
# TODO: johbo: Needs proper test coverage with a git repository
|
|
|
# that contains a tag object, so that we would end up with
|
|
|
# a peeled ref at this point.
|
|
|
for k in remote_refs:
|
|
|
if k.endswith(PEELED_REF_MARKER):
|
|
|
log.debug("Skipping peeled reference %s", k)
|
|
|
continue
|
|
|
repo[k] = remote_refs[k]
|
|
|
|
|
|
if refs and not update_after:
|
|
|
# update to ref
|
|
|
# mikhail: explicitly set the head to the last ref.
|
|
|
update_to_ref = refs[-1]
|
|
|
if isinstance(update_after, str):
|
|
|
update_to_ref = update_after
|
|
|
|
|
|
repo[HEAD_MARKER] = remote_refs[update_to_ref]
|
|
|
|
|
|
if update_after:
|
|
|
# we want to check out HEAD
|
|
|
repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
|
|
|
index.build_index_from_tree(repo.path, repo.index_path(),
|
|
|
repo.object_store, repo[HEAD_MARKER].tree)
|
|
|
|
|
|
if isinstance(remote_refs, FetchPackResult):
|
|
|
return remote_refs.refs
|
|
|
return remote_refs
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
|
|
|
self._factory.repo(wire)
|
|
|
if refs and not isinstance(refs, (list, tuple)):
|
|
|
refs = [refs]
|
|
|
|
|
|
config = self._wire_to_config(wire)
|
|
|
# get all remote refs we'll use to fetch later
|
|
|
cmd = ['ls-remote']
|
|
|
if not all_refs:
|
|
|
cmd += ['--heads', '--tags']
|
|
|
cmd += [url]
|
|
|
output, __ = self.run_git_command(
|
|
|
wire, cmd, fail_on_stderr=False,
|
|
|
_copts=self._remote_conf(config),
|
|
|
extra_env={'GIT_TERMINAL_PROMPT': '0'})
|
|
|
|
|
|
remote_refs = collections.OrderedDict()
|
|
|
fetch_refs = []
|
|
|
|
|
|
for ref_line in output.splitlines():
|
|
|
sha, ref = ref_line.split(b'\t')
|
|
|
sha = sha.strip()
|
|
|
if ref in remote_refs:
|
|
|
# duplicate, skip
|
|
|
continue
|
|
|
if ref.endswith(PEELED_REF_MARKER):
|
|
|
log.debug("Skipping peeled reference %s", ref)
|
|
|
continue
|
|
|
# don't sync HEAD
|
|
|
if ref in [HEAD_MARKER]:
|
|
|
continue
|
|
|
|
|
|
remote_refs[ref] = sha
|
|
|
|
|
|
if refs and sha in refs:
|
|
|
# we filter fetch using our specified refs
|
|
|
fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
|
|
|
elif not refs:
|
|
|
fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
|
|
|
log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
|
|
|
|
|
|
if fetch_refs:
|
|
|
for chunk in more_itertools.chunked(fetch_refs, 128):
|
|
|
fetch_refs_chunks = list(chunk)
|
|
|
log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
|
|
|
self.run_git_command(
|
|
|
wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
|
|
|
fail_on_stderr=False,
|
|
|
_copts=self._remote_conf(config),
|
|
|
extra_env={'GIT_TERMINAL_PROMPT': '0'})
|
|
|
if kwargs.get('sync_large_objects'):
|
|
|
self.run_git_command(
|
|
|
wire, ['lfs', 'fetch', url, '--all'],
|
|
|
fail_on_stderr=False,
|
|
|
_copts=self._remote_conf(config),
|
|
|
)
|
|
|
|
|
|
return remote_refs
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def sync_push(self, wire, url, refs=None, **kwargs):
|
|
|
if not self.check_url(url, wire):
|
|
|
return
|
|
|
config = self._wire_to_config(wire)
|
|
|
self._factory.repo(wire)
|
|
|
self.run_git_command(
|
|
|
wire, ['push', url, '--mirror'], fail_on_stderr=False,
|
|
|
_copts=self._remote_conf(config),
|
|
|
extra_env={'GIT_TERMINAL_PROMPT': '0'})
|
|
|
if kwargs.get('sync_large_objects'):
|
|
|
self.run_git_command(
|
|
|
wire, ['lfs', 'push', url, '--all'],
|
|
|
fail_on_stderr=False,
|
|
|
_copts=self._remote_conf(config),
|
|
|
)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_remote_refs(self, wire, url):
|
|
|
repo = Repo(url)
|
|
|
return repo.get_refs()
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_description(self, wire):
|
|
|
repo = self._factory.repo(wire)
|
|
|
return repo.get_description()
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
|
|
|
origin_repo_path = wire['path']
|
|
|
repo = self._factory.repo(wire)
|
|
|
# fetch from other_repo_path to our origin repo
|
|
|
LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
|
|
|
|
|
|
wire_remote = wire.copy()
|
|
|
wire_remote['path'] = other_repo_path
|
|
|
repo_remote = self._factory.repo(wire_remote)
|
|
|
|
|
|
# fetch from origin_repo_path to our remote repo
|
|
|
LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
|
|
|
|
|
|
revs = [
|
|
|
x.commit.id
|
|
|
for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
|
|
|
return revs
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_object(self, wire, sha, maybe_unreachable=False):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _get_object(_context_uid, _repo_id, _sha):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
|
|
|
missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
|
|
|
try:
|
|
|
commit = repo.revparse_single(sha)
|
|
|
except KeyError:
|
|
|
# NOTE(marcink): KeyError doesn't give us any meaningful information
|
|
|
# here, we instead give something more explicit
|
|
|
e = exceptions.RefNotFoundException('SHA: %s not found', sha)
|
|
|
raise exceptions.LookupException(e)(missing_commit_err)
|
|
|
except ValueError as e:
|
|
|
raise exceptions.LookupException(e)(missing_commit_err)
|
|
|
|
|
|
is_tag = False
|
|
|
if isinstance(commit, pygit2.Tag):
|
|
|
commit = repo.get(commit.target)
|
|
|
is_tag = True
|
|
|
|
|
|
check_dangling = True
|
|
|
if is_tag:
|
|
|
check_dangling = False
|
|
|
|
|
|
if check_dangling and maybe_unreachable:
|
|
|
check_dangling = False
|
|
|
|
|
|
# we used a reference and it parsed means we're not having a dangling commit
|
|
|
if sha != commit.hex:
|
|
|
check_dangling = False
|
|
|
|
|
|
if check_dangling:
|
|
|
# check for dangling commit
|
|
|
for branch in repo.branches.with_commit(commit.hex):
|
|
|
if branch:
|
|
|
break
|
|
|
else:
|
|
|
# NOTE(marcink): Empty error doesn't give us any meaningful information
|
|
|
# here, we instead give something more explicit
|
|
|
e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
|
|
|
raise exceptions.LookupException(e)(missing_commit_err)
|
|
|
|
|
|
commit_id = commit.hex
|
|
|
type_str = commit.type_str
|
|
|
|
|
|
return {
|
|
|
'id': commit_id,
|
|
|
'type': type_str,
|
|
|
'commit_id': commit_id,
|
|
|
'idx': 0
|
|
|
}
|
|
|
|
|
|
return _get_object(context_uid, repo_id, sha)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_refs(self, wire):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _get_refs(_context_uid, _repo_id):
|
|
|
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
regex = re.compile('^refs/(heads|tags)/')
|
|
|
return {x.name: x.target.hex for x in
|
|
|
[ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
|
|
|
|
|
|
return _get_refs(context_uid, repo_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_branch_pointers(self, wire):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _get_branch_pointers(_context_uid, _repo_id):
|
|
|
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
regex = re.compile('^refs/heads')
|
|
|
with repo_init as repo:
|
|
|
branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
|
|
|
return {x.target.hex: x.shorthand for x in branches}
|
|
|
|
|
|
return _get_branch_pointers(context_uid, repo_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def head(self, wire, show_exc=True):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _head(_context_uid, _repo_id, _show_exc):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
try:
|
|
|
return repo.head.peel().hex
|
|
|
except Exception:
|
|
|
if show_exc:
|
|
|
raise
|
|
|
return _head(context_uid, repo_id, show_exc)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def init(self, wire):
|
|
|
repo_path = safe_str(wire['path'])
|
|
|
os.makedirs(repo_path, mode=0o755)
|
|
|
pygit2.init_repository(repo_path, bare=False)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def init_bare(self, wire):
|
|
|
repo_path = safe_str(wire['path'])
|
|
|
os.makedirs(repo_path, mode=0o755)
|
|
|
pygit2.init_repository(repo_path, bare=True)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def revision(self, wire, rev):
|
|
|
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _revision(_context_uid, _repo_id, _rev):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[rev]
|
|
|
obj_data = {
|
|
|
'id': commit.id.hex,
|
|
|
}
|
|
|
# tree objects itself don't have tree_id attribute
|
|
|
if hasattr(commit, 'tree_id'):
|
|
|
obj_data['tree'] = commit.tree_id.hex
|
|
|
|
|
|
return obj_data
|
|
|
return _revision(context_uid, repo_id, rev)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def date(self, wire, commit_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _date(_repo_id, _commit_id):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
|
|
|
if hasattr(commit, 'commit_time'):
|
|
|
commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
|
|
|
else:
|
|
|
commit = commit.get_object()
|
|
|
commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
|
|
|
|
|
|
# TODO(marcink): check dulwich difference of offset vs timezone
|
|
|
return [commit_time, commit_time_offset]
|
|
|
return _date(repo_id, commit_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def author(self, wire, commit_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _author(_repo_id, _commit_id):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
|
|
|
if hasattr(commit, 'author'):
|
|
|
author = commit.author
|
|
|
else:
|
|
|
author = commit.get_object().author
|
|
|
|
|
|
if author.email:
|
|
|
return f"{author.name} <{author.email}>"
|
|
|
|
|
|
try:
|
|
|
return f"{author.name}"
|
|
|
except Exception:
|
|
|
return f"{safe_str(author.raw_name)}"
|
|
|
|
|
|
return _author(repo_id, commit_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def message(self, wire, commit_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _message(_repo_id, _commit_id):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
return commit.message
|
|
|
return _message(repo_id, commit_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def parents(self, wire, commit_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _parents(_repo_id, _commit_id):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
if hasattr(commit, 'parent_ids'):
|
|
|
parent_ids = commit.parent_ids
|
|
|
else:
|
|
|
parent_ids = commit.get_object().parent_ids
|
|
|
|
|
|
return [x.hex for x in parent_ids]
|
|
|
return _parents(repo_id, commit_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def children(self, wire, commit_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
head = self.head(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _children(_repo_id, _commit_id):
|
|
|
|
|
|
output, __ = self.run_git_command(
|
|
|
wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
|
|
|
|
|
|
child_ids = []
|
|
|
pat = re.compile(fr'^{commit_id}')
|
|
|
for line in output.splitlines():
|
|
|
line = safe_str(line)
|
|
|
if pat.match(line):
|
|
|
found_ids = line.split(' ')[1:]
|
|
|
child_ids.extend(found_ids)
|
|
|
break
|
|
|
|
|
|
return child_ids
|
|
|
return _children(repo_id, commit_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def set_refs(self, wire, key, value):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
repo.references.create(key, value, force=True)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def update_refs(self, wire, key, value):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
if key not in repo.references:
|
|
|
raise ValueError(f'Reference {key} not found in the repository')
|
|
|
repo.references.create(key, value, force=True)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def create_branch(self, wire, branch_name, commit_id, force=False):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
if commit_id:
|
|
|
commit = repo[commit_id]
|
|
|
else:
|
|
|
# if commit is not given just use the HEAD
|
|
|
commit = repo.head()
|
|
|
|
|
|
if force:
|
|
|
repo.branches.local.create(branch_name, commit, force=force)
|
|
|
elif not repo.branches.get(branch_name):
|
|
|
# create only if that branch isn't existing
|
|
|
repo.branches.local.create(branch_name, commit, force=force)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def remove_ref(self, wire, key):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
repo.references.delete(key)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def tag_remove(self, wire, tag_name):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
key = f'refs/tags/{tag_name}'
|
|
|
repo.references.delete(key)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def tree_changes(self, wire, source_id, target_id):
|
|
|
repo = self._factory.repo(wire)
|
|
|
# source can be empty
|
|
|
source_id = safe_bytes(source_id if source_id else b'')
|
|
|
target_id = safe_bytes(target_id)
|
|
|
|
|
|
source = repo[source_id].tree if source_id else None
|
|
|
target = repo[target_id].tree
|
|
|
result = repo.object_store.tree_changes(source, target)
|
|
|
|
|
|
added = set()
|
|
|
modified = set()
|
|
|
deleted = set()
|
|
|
for (old_path, new_path), (_, _), (_, _) in list(result):
|
|
|
if new_path and old_path:
|
|
|
modified.add(new_path)
|
|
|
elif new_path and not old_path:
|
|
|
added.add(new_path)
|
|
|
elif not new_path and old_path:
|
|
|
deleted.add(old_path)
|
|
|
|
|
|
return list(added), list(modified), list(deleted)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def tree_and_type_for_path(self, wire, commit_id, path):
|
|
|
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
try:
|
|
|
tree = commit.tree[path]
|
|
|
except KeyError:
|
|
|
return None, None, None
|
|
|
|
|
|
return tree.id.hex, tree.type_str, tree.filemode
|
|
|
return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def tree_items(self, wire, tree_id):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _tree_items(_repo_id, _tree_id):
|
|
|
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
try:
|
|
|
tree = repo[tree_id]
|
|
|
except KeyError:
|
|
|
raise ObjectMissing(f'No tree with id: {tree_id}')
|
|
|
|
|
|
result = []
|
|
|
for item in tree:
|
|
|
item_sha = item.hex
|
|
|
item_mode = item.filemode
|
|
|
item_type = item.type_str
|
|
|
|
|
|
if item_type == 'commit':
|
|
|
# NOTE(marcink): submodules we translate to 'link' for backward compat
|
|
|
item_type = 'link'
|
|
|
|
|
|
result.append((item.name, item_mode, item_sha, item_type))
|
|
|
return result
|
|
|
return _tree_items(repo_id, tree_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
|
|
|
"""
|
|
|
Old version that uses subprocess to call diff
|
|
|
"""
|
|
|
|
|
|
flags = [
|
|
|
f'-U{context}', '--patch',
|
|
|
'--binary',
|
|
|
'--find-renames',
|
|
|
'--no-indent-heuristic',
|
|
|
# '--indent-heuristic',
|
|
|
#'--full-index',
|
|
|
#'--abbrev=40'
|
|
|
]
|
|
|
|
|
|
if opt_ignorews:
|
|
|
flags.append('--ignore-all-space')
|
|
|
|
|
|
if commit_id_1 == self.EMPTY_COMMIT:
|
|
|
cmd = ['show'] + flags + [commit_id_2]
|
|
|
else:
|
|
|
cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
|
|
|
|
|
|
if file_filter:
|
|
|
cmd.extend(['--', file_filter])
|
|
|
|
|
|
diff, __ = self.run_git_command(wire, cmd)
|
|
|
# If we used 'show' command, strip first few lines (until actual diff
|
|
|
# starts)
|
|
|
if commit_id_1 == self.EMPTY_COMMIT:
|
|
|
lines = diff.splitlines()
|
|
|
x = 0
|
|
|
for line in lines:
|
|
|
if line.startswith(b'diff'):
|
|
|
break
|
|
|
x += 1
|
|
|
# Append new line just like 'diff' command do
|
|
|
diff = '\n'.join(lines[x:]) + '\n'
|
|
|
return diff
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
|
|
|
with repo_init as repo:
|
|
|
swap = True
|
|
|
flags = 0
|
|
|
flags |= pygit2.GIT_DIFF_SHOW_BINARY
|
|
|
|
|
|
if opt_ignorews:
|
|
|
flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
|
|
|
|
|
|
if commit_id_1 == self.EMPTY_COMMIT:
|
|
|
comm1 = repo[commit_id_2]
|
|
|
diff_obj = comm1.tree.diff_to_tree(
|
|
|
flags=flags, context_lines=context, swap=swap)
|
|
|
|
|
|
else:
|
|
|
comm1 = repo[commit_id_2]
|
|
|
comm2 = repo[commit_id_1]
|
|
|
diff_obj = comm1.tree.diff_to_tree(
|
|
|
comm2.tree, flags=flags, context_lines=context, swap=swap)
|
|
|
similar_flags = 0
|
|
|
similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
|
|
|
diff_obj.find_similar(flags=similar_flags)
|
|
|
|
|
|
if file_filter:
|
|
|
for p in diff_obj:
|
|
|
if p.delta.old_file.path == file_filter:
|
|
|
return BytesEnvelope(p.data) or BytesEnvelope(b'')
|
|
|
# fo matching path == no diff
|
|
|
return BytesEnvelope(b'')
|
|
|
|
|
|
return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def node_history(self, wire, commit_id, path, limit):
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
|
|
|
# optimize for n==1, rev-list is much faster for that use-case
|
|
|
if limit == 1:
|
|
|
cmd = ['rev-list', '-1', commit_id, '--', path]
|
|
|
else:
|
|
|
cmd = ['log']
|
|
|
if limit:
|
|
|
cmd.extend(['-n', str(safe_int(limit, 0))])
|
|
|
cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
|
|
|
|
|
|
output, __ = self.run_git_command(wire, cmd)
|
|
|
commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
|
|
|
|
|
|
return [x for x in commit_ids]
|
|
|
return _node_history(context_uid, repo_id, commit_id, path, limit)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def node_annotate_legacy(self, wire, commit_id, path):
|
|
|
# note: replaced by pygit2 implementation
|
|
|
cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
|
|
|
# -l ==> outputs long shas (and we need all 40 characters)
|
|
|
# --root ==> doesn't put '^' character for boundaries
|
|
|
# -r commit_id ==> blames for the given commit
|
|
|
output, __ = self.run_git_command(wire, cmd)
|
|
|
|
|
|
result = []
|
|
|
for i, blame_line in enumerate(output.splitlines()[:-1]):
|
|
|
line_no = i + 1
|
|
|
blame_commit_id, line = re.split(rb' ', blame_line, 1)
|
|
|
result.append((line_no, blame_commit_id, line))
|
|
|
|
|
|
return result
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def node_annotate(self, wire, commit_id, path):
|
|
|
|
|
|
result_libgit = []
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
blame_obj = repo.blame(path, newest_commit=commit_id)
|
|
|
file_content = commit.tree[path].data
|
|
|
for i, line in enumerate(splitnewlines(file_content)):
|
|
|
line_no = i + 1
|
|
|
hunk = blame_obj.for_line(line_no)
|
|
|
blame_commit_id = hunk.final_commit_id.hex
|
|
|
|
|
|
result_libgit.append((line_no, blame_commit_id, line))
|
|
|
|
|
|
return BinaryEnvelope(result_libgit)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def update_server_info(self, wire, force=False):
|
|
|
cmd = ['update-server-info']
|
|
|
if force:
|
|
|
cmd += ['--force']
|
|
|
output, __ = self.run_git_command(wire, cmd)
|
|
|
return output.splitlines()
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_all_commit_ids(self, wire):
|
|
|
|
|
|
cache_on, context_uid, repo_id = self._cache_on(wire)
|
|
|
region = self._region(wire)
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _get_all_commit_ids(_context_uid, _repo_id):
|
|
|
|
|
|
cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
|
|
|
try:
|
|
|
output, __ = self.run_git_command(wire, cmd)
|
|
|
return output.splitlines()
|
|
|
except Exception:
|
|
|
# Can be raised for empty repositories
|
|
|
return []
|
|
|
|
|
|
@region.conditional_cache_on_arguments(condition=cache_on)
|
|
|
def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
|
|
|
results = []
|
|
|
with repo_init as repo:
|
|
|
for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
|
|
|
results.append(commit.id.hex)
|
|
|
|
|
|
return _get_all_commit_ids(context_uid, repo_id)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def run_git_command(self, wire, cmd, **opts):
|
|
|
path = wire.get('path', None)
|
|
|
debug_mode = rhodecode.ConfigGet().get_bool('debug')
|
|
|
|
|
|
if path and os.path.isdir(path):
|
|
|
opts['cwd'] = path
|
|
|
|
|
|
if '_bare' in opts:
|
|
|
_copts = []
|
|
|
del opts['_bare']
|
|
|
else:
|
|
|
_copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
|
|
|
safe_call = False
|
|
|
if '_safe' in opts:
|
|
|
# no exc on failure
|
|
|
del opts['_safe']
|
|
|
safe_call = True
|
|
|
|
|
|
if '_copts' in opts:
|
|
|
_copts.extend(opts['_copts'] or [])
|
|
|
del opts['_copts']
|
|
|
|
|
|
gitenv = os.environ.copy()
|
|
|
gitenv.update(opts.pop('extra_env', {}))
|
|
|
# need to clean fix GIT_DIR !
|
|
|
if 'GIT_DIR' in gitenv:
|
|
|
del gitenv['GIT_DIR']
|
|
|
gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
|
|
|
gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
|
|
|
|
|
|
cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
|
|
|
_opts = {'env': gitenv, 'shell': False}
|
|
|
|
|
|
proc = None
|
|
|
try:
|
|
|
_opts.update(opts)
|
|
|
proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
|
|
|
|
|
|
return b''.join(proc), b''.join(proc.stderr)
|
|
|
except OSError as err:
|
|
|
cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
|
|
|
call_opts = {}
|
|
|
if debug_mode:
|
|
|
call_opts = _opts
|
|
|
|
|
|
tb_err = ("Couldn't run git command ({}).\n"
|
|
|
"Original error was:{}\n"
|
|
|
"Call options:{}\n"
|
|
|
.format(cmd, err, call_opts))
|
|
|
log.exception(tb_err)
|
|
|
if safe_call:
|
|
|
return '', err
|
|
|
else:
|
|
|
raise exceptions.VcsException()(tb_err)
|
|
|
finally:
|
|
|
if proc:
|
|
|
proc.close()
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def install_hooks(self, wire, force=False):
|
|
|
from vcsserver.hook_utils import install_git_hooks
|
|
|
bare = self.bare(wire)
|
|
|
path = wire['path']
|
|
|
binary_dir = settings.BINARY_DIR
|
|
|
if binary_dir:
|
|
|
os.path.join(binary_dir, 'python3')
|
|
|
return install_git_hooks(path, bare, force_create=force)
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def get_hooks_info(self, wire):
|
|
|
from vcsserver.hook_utils import (
|
|
|
get_git_pre_hook_version, get_git_post_hook_version)
|
|
|
bare = self.bare(wire)
|
|
|
path = wire['path']
|
|
|
return {
|
|
|
'pre_version': get_git_pre_hook_version(path, bare),
|
|
|
'post_version': get_git_post_hook_version(path, bare),
|
|
|
}
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def set_head_ref(self, wire, head_name):
|
|
|
log.debug('Setting refs/head to `%s`', head_name)
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
with repo_init as repo:
|
|
|
repo.set_head(f'refs/heads/{head_name}')
|
|
|
|
|
|
return [head_name] + [f'set HEAD to refs/heads/{head_name}']
|
|
|
|
|
|
@reraise_safe_exceptions
|
|
|
def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
|
|
|
archive_dir_name, commit_id, cache_config):
|
|
|
|
|
|
def file_walker(_commit_id, path):
|
|
|
repo_init = self._factory.repo_libgit2(wire)
|
|
|
|
|
|
with repo_init as repo:
|
|
|
commit = repo[commit_id]
|
|
|
|
|
|
if path in ['', '/']:
|
|
|
tree = commit.tree
|
|
|
else:
|
|
|
tree = commit.tree[path.rstrip('/')]
|
|
|
tree_id = tree.id.hex
|
|
|
try:
|
|
|
tree = repo[tree_id]
|
|
|
except KeyError:
|
|
|
raise ObjectMissing(f'No tree with id: {tree_id}')
|
|
|
|
|
|
index = LibGit2Index.Index()
|
|
|
index.read_tree(tree)
|
|
|
file_iter = index
|
|
|
|
|
|
for file_node in file_iter:
|
|
|
file_path = file_node.path
|
|
|
mode = file_node.mode
|
|
|
is_link = stat.S_ISLNK(mode)
|
|
|
if mode == pygit2.GIT_FILEMODE_COMMIT:
|
|
|
log.debug('Skipping path %s as a commit node', file_path)
|
|
|
continue
|
|
|
yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
|
|
|
|
|
|
return store_archive_in_cache(
|
|
|
file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
|
|
|
|