git_remote.py
1526 lines
| 55.3 KiB
| text/x-python
|
PythonLexer
r1145 | # RhodeCode VCSServer provides access to different vcs backends via network. | |||
r1327 | # Copyright (C) 2014-2024 RhodeCode GmbH | |||
r1145 | # | |||
# This program is free software; you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation; either version 3 of the License, or | ||||
# (at your option) any later version. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU General Public License | ||||
# along with this program; if not, write to the Free Software Foundation, | ||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
import collections | ||||
import logging | ||||
import os | ||||
import re | ||||
import stat | ||||
import traceback | ||||
import urllib.request | ||||
import urllib.parse | ||||
import urllib.error | ||||
from functools import wraps | ||||
import more_itertools | ||||
import pygit2 | ||||
from pygit2 import Repository as LibGit2Repo | ||||
from pygit2 import index as LibGit2Index | ||||
from dulwich import index, objects | ||||
from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult | ||||
from dulwich.errors import ( | ||||
NotGitRepository, ChecksumMismatch, WrongObjectException, | ||||
MissingCommitError, ObjectMissing, HangupException, | ||||
UnexpectedCommandError) | ||||
from dulwich.repo import Repo as DulwichRepo | ||||
r1310 | import vcsserver | |||
r1145 | from vcsserver import exceptions, settings, subprocessio | |||
r1249 | from vcsserver.lib.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines | |||
r1145 | from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope | |||
from vcsserver.hgcompat import ( | ||||
hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler) | ||||
from vcsserver.git_lfs.lib import LFSOidStore | ||||
from vcsserver.vcs_base import RemoteBase | ||||
DIR_STAT = stat.S_IFDIR | ||||
FILE_MODE = stat.S_IFMT | ||||
GIT_LINK = objects.S_IFGITLINK | ||||
PEELED_REF_MARKER = b'^{}' | ||||
HEAD_MARKER = b'HEAD' | ||||
log = logging.getLogger(__name__) | ||||
def reraise_safe_exceptions(func): | ||||
"""Converts Dulwich exceptions to something neutral.""" | ||||
@wraps(func) | ||||
def wrapper(*args, **kwargs): | ||||
try: | ||||
return func(*args, **kwargs) | ||||
except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e: | ||||
exc = exceptions.LookupException(org_exc=e) | ||||
raise exc(safe_str(e)) | ||||
except (HangupException, UnexpectedCommandError) as e: | ||||
exc = exceptions.VcsException(org_exc=e) | ||||
raise exc(safe_str(e)) | ||||
except Exception: | ||||
# NOTE(marcink): because of how dulwich handles some exceptions | ||||
# (KeyError on empty repos), we cannot track this and catch all | ||||
# exceptions, it's an exceptions from other handlers | ||||
#if not hasattr(e, '_vcs_kind'): | ||||
#log.exception("Unhandled exception in git remote call") | ||||
#raise_from_original(exceptions.UnhandledException) | ||||
raise | ||||
return wrapper | ||||
class Repo(DulwichRepo): | ||||
""" | ||||
A wrapper for dulwich Repo class. | ||||
Since dulwich is sometimes keeping .idx file descriptors open, it leads to | ||||
"Too many open files" error. We need to close all opened file descriptors | ||||
once the repo object is destroyed. | ||||
""" | ||||
def __del__(self): | ||||
if hasattr(self, 'object_store'): | ||||
self.close() | ||||
class Repository(LibGit2Repo): | ||||
def __enter__(self): | ||||
return self | ||||
def __exit__(self, exc_type, exc_val, exc_tb): | ||||
self.free() | ||||
class GitFactory(RepoFactory): | ||||
repo_type = 'git' | ||||
def _create_repo(self, wire, create, use_libgit2=False): | ||||
if use_libgit2: | ||||
repo = Repository(safe_bytes(wire['path'])) | ||||
else: | ||||
# dulwich mode | ||||
repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING) | ||||
repo = Repo(repo_path) | ||||
log.debug('repository created: got GIT object: %s', repo) | ||||
return repo | ||||
def repo(self, wire, create=False, use_libgit2=False): | ||||
""" | ||||
Get a repository instance for the given path. | ||||
""" | ||||
return self._create_repo(wire, create, use_libgit2) | ||||
def repo_libgit2(self, wire): | ||||
return self.repo(wire, use_libgit2=True) | ||||
def create_signature_from_string(author_str, **kwargs): | ||||
""" | ||||
Creates a pygit2.Signature object from a string of the format 'Name <email>'. | ||||
:param author_str: String of the format 'Name <email>' | ||||
:return: pygit2.Signature object | ||||
""" | ||||
match = re.match(r'^(.+) <(.+)>$', author_str) | ||||
if match is None: | ||||
raise ValueError(f"Invalid format: {author_str}") | ||||
name, email = match.groups() | ||||
return pygit2.Signature(name, email, **kwargs) | ||||
def get_obfuscated_url(url_obj): | ||||
url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd | ||||
url_obj.query = obfuscate_qs(url_obj.query) | ||||
obfuscated_uri = str(url_obj) | ||||
return obfuscated_uri | ||||
class GitRemote(RemoteBase): | ||||
def __init__(self, factory): | ||||
self._factory = factory | ||||
self._bulk_methods = { | ||||
"date": self.date, | ||||
"author": self.author, | ||||
"branch": self.branch, | ||||
"message": self.message, | ||||
"parents": self.parents, | ||||
"_commit": self.revision, | ||||
} | ||||
self._bulk_file_methods = { | ||||
"size": self.get_node_size, | ||||
"data": self.get_node_data, | ||||
"flags": self.get_node_flags, | ||||
"is_binary": self.get_node_is_binary, | ||||
"md5": self.md5_hash | ||||
} | ||||
def _wire_to_config(self, wire): | ||||
if 'config' in wire: | ||||
return {x[0] + '_' + x[1]: x[2] for x in wire['config']} | ||||
return {} | ||||
def _remote_conf(self, config): | ||||
params = [ | ||||
'-c', 'core.askpass=""', | ||||
] | ||||
r1194 | config_attrs = { | |||
'vcs_ssl_dir': 'http.sslCAinfo={}', | ||||
'vcs_git_lfs_store_location': 'lfs.storage={}' | ||||
} | ||||
for key, param in config_attrs.items(): | ||||
if value := config.get(key): | ||||
params.extend(['-c', param.format(value)]) | ||||
r1145 | return params | |||
@reraise_safe_exceptions | ||||
def discover_git_version(self): | ||||
stdout, _ = self.run_git_command( | ||||
{}, ['--version'], _bare=True, _safe=True) | ||||
prefix = b'git version' | ||||
if stdout.startswith(prefix): | ||||
stdout = stdout[len(prefix):] | ||||
return safe_str(stdout.strip()) | ||||
@reraise_safe_exceptions | ||||
def is_empty(self, wire): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
try: | ||||
has_head = repo.head.name | ||||
if has_head: | ||||
return False | ||||
# NOTE(marcink): check again using more expensive method | ||||
return repo.is_empty | ||||
except Exception: | ||||
pass | ||||
return True | ||||
@reraise_safe_exceptions | ||||
def assert_correct_path(self, wire): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _assert_correct_path(_context_uid, _repo_id, fast_check): | ||||
if fast_check: | ||||
path = safe_str(wire['path']) | ||||
if pygit2.discover_repository(path): | ||||
return True | ||||
return False | ||||
else: | ||||
try: | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init: | ||||
pass | ||||
except pygit2.GitError: | ||||
path = wire.get('path') | ||||
tb = traceback.format_exc() | ||||
log.debug("Invalid Git path `%s`, tb: %s", path, tb) | ||||
return False | ||||
return True | ||||
return _assert_correct_path(context_uid, repo_id, True) | ||||
@reraise_safe_exceptions | ||||
def bare(self, wire): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
return repo.is_bare | ||||
@reraise_safe_exceptions | ||||
def get_node_data(self, wire, commit_id, path): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
blob_obj = commit.tree[path] | ||||
if blob_obj.type != pygit2.GIT_OBJ_BLOB: | ||||
raise exceptions.LookupException()( | ||||
f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') | ||||
return BytesEnvelope(blob_obj.data) | ||||
@reraise_safe_exceptions | ||||
def get_node_size(self, wire, commit_id, path): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
blob_obj = commit.tree[path] | ||||
if blob_obj.type != pygit2.GIT_OBJ_BLOB: | ||||
raise exceptions.LookupException()( | ||||
f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') | ||||
return blob_obj.size | ||||
@reraise_safe_exceptions | ||||
def get_node_flags(self, wire, commit_id, path): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
blob_obj = commit.tree[path] | ||||
if blob_obj.type != pygit2.GIT_OBJ_BLOB: | ||||
raise exceptions.LookupException()( | ||||
f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') | ||||
return blob_obj.filemode | ||||
@reraise_safe_exceptions | ||||
def get_node_is_binary(self, wire, commit_id, path): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
blob_obj = commit.tree[path] | ||||
if blob_obj.type != pygit2.GIT_OBJ_BLOB: | ||||
raise exceptions.LookupException()( | ||||
f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') | ||||
return blob_obj.is_binary | ||||
@reraise_safe_exceptions | ||||
def blob_as_pretty_string(self, wire, sha): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
blob_obj = repo[sha] | ||||
return BytesEnvelope(blob_obj.data) | ||||
@reraise_safe_exceptions | ||||
def blob_raw_length(self, wire, sha): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _blob_raw_length(_repo_id, _sha): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
blob = repo[sha] | ||||
return blob.size | ||||
return _blob_raw_length(repo_id, sha) | ||||
def _parse_lfs_pointer(self, raw_content): | ||||
spec_string = b'version https://git-lfs.github.com/spec' | ||||
if raw_content and raw_content.startswith(spec_string): | ||||
pattern = re.compile(rb""" | ||||
(?:\n)? | ||||
^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n | ||||
^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n | ||||
^size[ ](?P<oid_size>[0-9]+)\n | ||||
(?:\n)? | ||||
""", re.VERBOSE | re.MULTILINE) | ||||
match = pattern.match(raw_content) | ||||
if match: | ||||
return match.groupdict() | ||||
return {} | ||||
@reraise_safe_exceptions | ||||
def is_large_file(self, wire, commit_id): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _is_large_file(_repo_id, _sha): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
blob = repo[commit_id] | ||||
if blob.is_binary: | ||||
return {} | ||||
return self._parse_lfs_pointer(blob.data) | ||||
return _is_large_file(repo_id, commit_id) | ||||
@reraise_safe_exceptions | ||||
def is_binary(self, wire, tree_id): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _is_binary(_repo_id, _tree_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
blob_obj = repo[tree_id] | ||||
return blob_obj.is_binary | ||||
return _is_binary(repo_id, tree_id) | ||||
@reraise_safe_exceptions | ||||
def md5_hash(self, wire, commit_id, path): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _md5_hash(_repo_id, _commit_id, _path): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[_commit_id] | ||||
blob_obj = commit.tree[_path] | ||||
if blob_obj.type != pygit2.GIT_OBJ_BLOB: | ||||
raise exceptions.LookupException()( | ||||
f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}') | ||||
return '' | ||||
return _md5_hash(repo_id, commit_id, path) | ||||
@reraise_safe_exceptions | ||||
def in_largefiles_store(self, wire, oid): | ||||
conf = self._wire_to_config(wire) | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
repo_name = repo.path | ||||
store_location = conf.get('vcs_git_lfs_store_location') | ||||
if store_location: | ||||
store = LFSOidStore( | ||||
oid=oid, repo=repo_name, store_location=store_location) | ||||
return store.has_oid() | ||||
return False | ||||
@reraise_safe_exceptions | ||||
def store_path(self, wire, oid): | ||||
conf = self._wire_to_config(wire) | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
repo_name = repo.path | ||||
store_location = conf.get('vcs_git_lfs_store_location') | ||||
if store_location: | ||||
store = LFSOidStore( | ||||
oid=oid, repo=repo_name, store_location=store_location) | ||||
return store.oid_path | ||||
raise ValueError(f'Unable to fetch oid with path {oid}') | ||||
@reraise_safe_exceptions | ||||
def bulk_request(self, wire, rev, pre_load): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _bulk_request(_repo_id, _rev, _pre_load): | ||||
result = {} | ||||
for attr in pre_load: | ||||
try: | ||||
method = self._bulk_methods[attr] | ||||
wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache | ||||
args = [wire, rev] | ||||
result[attr] = method(*args) | ||||
except KeyError as e: | ||||
raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}") | ||||
return result | ||||
return _bulk_request(repo_id, rev, sorted(pre_load)) | ||||
@reraise_safe_exceptions | ||||
def bulk_file_request(self, wire, commit_id, path, pre_load): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load): | ||||
result = {} | ||||
for attr in pre_load: | ||||
try: | ||||
method = self._bulk_file_methods[attr] | ||||
wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache | ||||
result[attr] = method(wire, _commit_id, _path) | ||||
except KeyError as e: | ||||
raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"') | ||||
return result | ||||
return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load))) | ||||
def _build_opener(self, url: str): | ||||
handlers = [] | ||||
url_obj = url_parser(safe_bytes(url)) | ||||
authinfo = url_obj.authinfo()[1] | ||||
if authinfo: | ||||
# create a password manager | ||||
passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() | ||||
r1196 | passmgr.add_password(*convert_to_str(authinfo)) | |||
r1145 | ||||
handlers.extend((httpbasicauthhandler(passmgr), | ||||
httpdigestauthhandler(passmgr))) | ||||
return urllib.request.build_opener(*handlers) | ||||
@reraise_safe_exceptions | ||||
def check_url(self, url, config): | ||||
url_obj = url_parser(safe_bytes(url)) | ||||
test_uri = safe_str(url_obj.authinfo()[0]) | ||||
obfuscated_uri = get_obfuscated_url(url_obj) | ||||
log.info("Checking URL for remote cloning/import: %s", obfuscated_uri) | ||||
if not test_uri.endswith('info/refs'): | ||||
test_uri = test_uri.rstrip('/') + '/info/refs' | ||||
r1195 | o = self._build_opener(url=url) | |||
r1145 | o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git | |||
q = {"service": 'git-upload-pack'} | ||||
r1152 | qs = f'?{urllib.parse.urlencode(q)}' | |||
r1145 | cu = f"{test_uri}{qs}" | |||
try: | ||||
r1160 | req = urllib.request.Request(cu, None, {}) | |||
r1145 | log.debug("Trying to open URL %s", obfuscated_uri) | |||
resp = o.open(req) | ||||
if resp.code != 200: | ||||
raise exceptions.URLError()('Return Code is not 200') | ||||
except Exception as e: | ||||
log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True) | ||||
# means it cannot be cloned | ||||
raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}") | ||||
# now detect if it's proper git repo | ||||
gitdata: bytes = resp.read() | ||||
if b'service=git-upload-pack' in gitdata: | ||||
pass | ||||
elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata): | ||||
r1160 | # old style git can return some other format! | |||
r1145 | pass | |||
else: | ||||
e = None | ||||
raise exceptions.URLError(e)( | ||||
r1152 | f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}") | |||
r1145 | ||||
return True | ||||
@reraise_safe_exceptions | ||||
def clone(self, wire, url, deferred, valid_refs, update_after_clone): | ||||
# TODO(marcink): deprecate this method. Last i checked we don't use it anymore | ||||
remote_refs = self.pull(wire, url, apply_refs=False) | ||||
repo = self._factory.repo(wire) | ||||
if isinstance(valid_refs, list): | ||||
valid_refs = tuple(valid_refs) | ||||
for k in remote_refs: | ||||
# only parse heads/tags and skip so called deferred tags | ||||
if k.startswith(valid_refs) and not k.endswith(deferred): | ||||
repo[k] = remote_refs[k] | ||||
if update_after_clone: | ||||
# we want to checkout HEAD | ||||
repo["HEAD"] = remote_refs["HEAD"] | ||||
index.build_index_from_tree(repo.path, repo.index_path(), | ||||
repo.object_store, repo["HEAD"].tree) | ||||
@reraise_safe_exceptions | ||||
def branch(self, wire, commit_id): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _branch(_context_uid, _repo_id, _commit_id): | ||||
regex = re.compile('^refs/heads') | ||||
def filter_with(ref): | ||||
return regex.match(ref[0]) and ref[1] == _commit_id | ||||
branches = list(filter(filter_with, list(self.get_refs(wire).items()))) | ||||
return [x[0].split('refs/heads/')[-1] for x in branches] | ||||
return _branch(context_uid, repo_id, commit_id) | ||||
@reraise_safe_exceptions | ||||
r1246 | def delete_branch(self, wire, branch_name): | |||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
if branch := repo.lookup_branch(branch_name): | ||||
branch.delete() | ||||
@reraise_safe_exceptions | ||||
r1145 | def commit_branches(self, wire, commit_id): | |||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _commit_branches(_context_uid, _repo_id, _commit_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
branches = [x for x in repo.branches.with_commit(_commit_id)] | ||||
return branches | ||||
return _commit_branches(context_uid, repo_id, commit_id) | ||||
@reraise_safe_exceptions | ||||
def add_object(self, wire, content): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
blob = objects.Blob() | ||||
blob.set_raw_string(content) | ||||
repo.object_store.add_object(blob) | ||||
return blob.id | ||||
@reraise_safe_exceptions | ||||
r1173 | def create_commit(self, wire, author, committer, message, branch, new_tree_id, | |||
date_args: list[int, int] = None, | ||||
parents: list | None = None): | ||||
r1145 | repo_init = self._factory.repo_libgit2(wire) | |||
with repo_init as repo: | ||||
if date_args: | ||||
current_time, offset = date_args | ||||
kw = { | ||||
'time': current_time, | ||||
'offset': offset | ||||
} | ||||
author = create_signature_from_string(author, **kw) | ||||
committer = create_signature_from_string(committer, **kw) | ||||
tree = new_tree_id | ||||
if isinstance(tree, (bytes, str)): | ||||
# validate this tree is in the repo... | ||||
tree = repo[safe_str(tree)].id | ||||
r1173 | if parents: | |||
# run via sha's and validate them in repo | ||||
parents = [repo[c].id for c in parents] | ||||
else: | ||||
parents = [] | ||||
# ensure we COMMIT on top of given branch head | ||||
# check if this repo has ANY branches, otherwise it's a new branch case we need to make | ||||
if branch in repo.branches.local: | ||||
parents += [repo.branches[branch].target] | ||||
elif [x for x in repo.branches.local]: | ||||
parents += [repo.head.target] | ||||
#else: | ||||
# in case we want to commit on new branch we create it on top of HEAD | ||||
#repo.branches.local.create(branch, repo.revparse_single('HEAD')) | ||||
r1145 | ||||
# # Create a new commit | ||||
commit_oid = repo.create_commit( | ||||
f'refs/heads/{branch}', # the name of the reference to update | ||||
author, # the author of the commit | ||||
committer, # the committer of the commit | ||||
message, # the commit message | ||||
tree, # the tree produced by the index | ||||
parents # list of parents for the new commit, usually just one, | ||||
) | ||||
new_commit_id = safe_str(commit_oid) | ||||
return new_commit_id | ||||
@reraise_safe_exceptions | ||||
def commit(self, wire, commit_data, branch, commit_tree, updated, removed): | ||||
def mode2pygit(mode): | ||||
""" | ||||
git only supports two filemode 644 and 755 | ||||
0o100755 -> 33261 | ||||
0o100644 -> 33188 | ||||
""" | ||||
return { | ||||
0o100644: pygit2.GIT_FILEMODE_BLOB, | ||||
0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE, | ||||
0o120000: pygit2.GIT_FILEMODE_LINK | ||||
}.get(mode) or pygit2.GIT_FILEMODE_BLOB | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
repo_index = repo.index | ||||
r1173 | commit_parents = None | |||
if commit_tree and commit_data['parents']: | ||||
commit_parents = commit_data['parents'] | ||||
parent_commit = repo[commit_parents[0]] | ||||
repo_index.read_tree(parent_commit.tree) | ||||
r1145 | for pathspec in updated: | |||
blob_id = repo.create_blob(pathspec['content']) | ||||
ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode'])) | ||||
repo_index.add(ie) | ||||
for pathspec in removed: | ||||
repo_index.remove(pathspec) | ||||
# Write changes to the index | ||||
repo_index.write() | ||||
# Create a tree from the updated index | ||||
r1173 | written_commit_tree = repo_index.write_tree() | |||
r1145 | ||||
r1173 | new_tree_id = written_commit_tree | |||
r1145 | ||||
author = commit_data['author'] | ||||
committer = commit_data['committer'] | ||||
message = commit_data['message'] | ||||
date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])] | ||||
new_commit_id = self.create_commit(wire, author, committer, message, branch, | ||||
r1173 | new_tree_id, date_args=date_args, parents=commit_parents) | |||
r1145 | ||||
# libgit2, ensure the branch is there and exists | ||||
self.create_branch(wire, branch, new_commit_id) | ||||
# libgit2, set new ref to this created commit | ||||
self.set_refs(wire, f'refs/heads/{branch}', new_commit_id) | ||||
return new_commit_id | ||||
@reraise_safe_exceptions | ||||
def pull(self, wire, url, apply_refs=True, refs=None, update_after=False): | ||||
if url != 'default' and '://' not in url: | ||||
client = LocalGitClient(url) | ||||
else: | ||||
url_obj = url_parser(safe_bytes(url)) | ||||
o = self._build_opener(url) | ||||
url = url_obj.authinfo()[0] | ||||
client = HttpGitClient(base_url=url, opener=o) | ||||
repo = self._factory.repo(wire) | ||||
determine_wants = repo.object_store.determine_wants_all | ||||
r1173 | ||||
r1145 | if refs: | |||
r1173 | refs: list[bytes] = [ascii_bytes(x) for x in refs] | |||
r1145 | ||||
r1173 | def determine_wants_requested(_remote_refs): | |||
r1145 | determined = [] | |||
r1173 | for ref_name, ref_hash in _remote_refs.items(): | |||
r1145 | bytes_ref_name = safe_bytes(ref_name) | |||
if bytes_ref_name in refs: | ||||
bytes_ref_hash = safe_bytes(ref_hash) | ||||
determined.append(bytes_ref_hash) | ||||
return determined | ||||
# swap with our custom requested wants | ||||
determine_wants = determine_wants_requested | ||||
try: | ||||
remote_refs = client.fetch( | ||||
path=url, target=repo, determine_wants=determine_wants) | ||||
except NotGitRepository as e: | ||||
log.warning( | ||||
'Trying to fetch from "%s" failed, not a Git repository.', url) | ||||
# Exception can contain unicode which we convert | ||||
raise exceptions.AbortException(e)(repr(e)) | ||||
# mikhail: client.fetch() returns all the remote refs, but fetches only | ||||
# refs filtered by `determine_wants` function. We need to filter result | ||||
# as well | ||||
if refs: | ||||
remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs} | ||||
if apply_refs: | ||||
# TODO: johbo: Needs proper test coverage with a git repository | ||||
# that contains a tag object, so that we would end up with | ||||
# a peeled ref at this point. | ||||
for k in remote_refs: | ||||
if k.endswith(PEELED_REF_MARKER): | ||||
log.debug("Skipping peeled reference %s", k) | ||||
continue | ||||
repo[k] = remote_refs[k] | ||||
if refs and not update_after: | ||||
r1173 | # update to ref | |||
r1145 | # mikhail: explicitly set the head to the last ref. | |||
r1173 | update_to_ref = refs[-1] | |||
if isinstance(update_after, str): | ||||
update_to_ref = update_after | ||||
repo[HEAD_MARKER] = remote_refs[update_to_ref] | ||||
r1145 | ||||
if update_after: | ||||
# we want to check out HEAD | ||||
repo[HEAD_MARKER] = remote_refs[HEAD_MARKER] | ||||
index.build_index_from_tree(repo.path, repo.index_path(), | ||||
repo.object_store, repo[HEAD_MARKER].tree) | ||||
if isinstance(remote_refs, FetchPackResult): | ||||
return remote_refs.refs | ||||
return remote_refs | ||||
@reraise_safe_exceptions | ||||
r1194 | def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs): | |||
r1145 | self._factory.repo(wire) | |||
if refs and not isinstance(refs, (list, tuple)): | ||||
refs = [refs] | ||||
config = self._wire_to_config(wire) | ||||
# get all remote refs we'll use to fetch later | ||||
cmd = ['ls-remote'] | ||||
if not all_refs: | ||||
cmd += ['--heads', '--tags'] | ||||
cmd += [url] | ||||
output, __ = self.run_git_command( | ||||
wire, cmd, fail_on_stderr=False, | ||||
_copts=self._remote_conf(config), | ||||
extra_env={'GIT_TERMINAL_PROMPT': '0'}) | ||||
remote_refs = collections.OrderedDict() | ||||
fetch_refs = [] | ||||
for ref_line in output.splitlines(): | ||||
sha, ref = ref_line.split(b'\t') | ||||
sha = sha.strip() | ||||
if ref in remote_refs: | ||||
# duplicate, skip | ||||
continue | ||||
if ref.endswith(PEELED_REF_MARKER): | ||||
log.debug("Skipping peeled reference %s", ref) | ||||
continue | ||||
# don't sync HEAD | ||||
if ref in [HEAD_MARKER]: | ||||
continue | ||||
remote_refs[ref] = sha | ||||
if refs and sha in refs: | ||||
# we filter fetch using our specified refs | ||||
fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}') | ||||
elif not refs: | ||||
fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}') | ||||
log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs)) | ||||
if fetch_refs: | ||||
r1185 | for chunk in more_itertools.chunked(fetch_refs, 128): | |||
r1145 | fetch_refs_chunks = list(chunk) | |||
log.debug('Fetching %s refs from import url', len(fetch_refs_chunks)) | ||||
self.run_git_command( | ||||
wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks, | ||||
fail_on_stderr=False, | ||||
_copts=self._remote_conf(config), | ||||
extra_env={'GIT_TERMINAL_PROMPT': '0'}) | ||||
r1194 | if kwargs.get('sync_large_objects'): | |||
self.run_git_command( | ||||
wire, ['lfs', 'fetch', url, '--all'], | ||||
fail_on_stderr=False, | ||||
_copts=self._remote_conf(config), | ||||
) | ||||
r1145 | ||||
return remote_refs | ||||
@reraise_safe_exceptions | ||||
r1195 | def sync_push(self, wire, url, refs=None, **kwargs): | |||
r1145 | if not self.check_url(url, wire): | |||
return | ||||
config = self._wire_to_config(wire) | ||||
self._factory.repo(wire) | ||||
self.run_git_command( | ||||
wire, ['push', url, '--mirror'], fail_on_stderr=False, | ||||
_copts=self._remote_conf(config), | ||||
extra_env={'GIT_TERMINAL_PROMPT': '0'}) | ||||
r1195 | if kwargs.get('sync_large_objects'): | |||
self.run_git_command( | ||||
wire, ['lfs', 'push', url, '--all'], | ||||
fail_on_stderr=False, | ||||
_copts=self._remote_conf(config), | ||||
) | ||||
r1145 | ||||
@reraise_safe_exceptions | ||||
def get_remote_refs(self, wire, url): | ||||
repo = Repo(url) | ||||
return repo.get_refs() | ||||
@reraise_safe_exceptions | ||||
def get_description(self, wire): | ||||
repo = self._factory.repo(wire) | ||||
return repo.get_description() | ||||
@reraise_safe_exceptions | ||||
r1163 | def get_missing_revs(self, wire, rev1, rev2, other_repo_path): | |||
origin_repo_path = wire['path'] | ||||
r1145 | repo = self._factory.repo(wire) | |||
r1163 | # fetch from other_repo_path to our origin repo | |||
LocalGitClient(thin_packs=False).fetch(other_repo_path, repo) | ||||
r1145 | ||||
wire_remote = wire.copy() | ||||
r1163 | wire_remote['path'] = other_repo_path | |||
r1145 | repo_remote = self._factory.repo(wire_remote) | |||
r1163 | ||||
# fetch from origin_repo_path to our remote repo | ||||
LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote) | ||||
r1145 | ||||
revs = [ | ||||
x.commit.id | ||||
for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])] | ||||
return revs | ||||
@reraise_safe_exceptions | ||||
def get_object(self, wire, sha, maybe_unreachable=False): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _get_object(_context_uid, _repo_id, _sha): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path']) | ||||
try: | ||||
commit = repo.revparse_single(sha) | ||||
except KeyError: | ||||
# NOTE(marcink): KeyError doesn't give us any meaningful information | ||||
# here, we instead give something more explicit | ||||
e = exceptions.RefNotFoundException('SHA: %s not found', sha) | ||||
raise exceptions.LookupException(e)(missing_commit_err) | ||||
except ValueError as e: | ||||
raise exceptions.LookupException(e)(missing_commit_err) | ||||
is_tag = False | ||||
if isinstance(commit, pygit2.Tag): | ||||
commit = repo.get(commit.target) | ||||
is_tag = True | ||||
check_dangling = True | ||||
if is_tag: | ||||
check_dangling = False | ||||
if check_dangling and maybe_unreachable: | ||||
check_dangling = False | ||||
# we used a reference and it parsed means we're not having a dangling commit | ||||
if sha != commit.hex: | ||||
check_dangling = False | ||||
if check_dangling: | ||||
# check for dangling commit | ||||
for branch in repo.branches.with_commit(commit.hex): | ||||
if branch: | ||||
break | ||||
else: | ||||
# NOTE(marcink): Empty error doesn't give us any meaningful information | ||||
# here, we instead give something more explicit | ||||
e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha) | ||||
raise exceptions.LookupException(e)(missing_commit_err) | ||||
commit_id = commit.hex | ||||
type_str = commit.type_str | ||||
return { | ||||
'id': commit_id, | ||||
'type': type_str, | ||||
'commit_id': commit_id, | ||||
'idx': 0 | ||||
} | ||||
return _get_object(context_uid, repo_id, sha) | ||||
@reraise_safe_exceptions | ||||
def get_refs(self, wire): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _get_refs(_context_uid, _repo_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
regex = re.compile('^refs/(heads|tags)/') | ||||
return {x.name: x.target.hex for x in | ||||
[ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]} | ||||
return _get_refs(context_uid, repo_id) | ||||
@reraise_safe_exceptions | ||||
def get_branch_pointers(self, wire): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _get_branch_pointers(_context_uid, _repo_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
regex = re.compile('^refs/heads') | ||||
with repo_init as repo: | ||||
branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)] | ||||
return {x.target.hex: x.shorthand for x in branches} | ||||
return _get_branch_pointers(context_uid, repo_id) | ||||
@reraise_safe_exceptions | ||||
def head(self, wire, show_exc=True): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _head(_context_uid, _repo_id, _show_exc): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
try: | ||||
return repo.head.peel().hex | ||||
except Exception: | ||||
if show_exc: | ||||
raise | ||||
return _head(context_uid, repo_id, show_exc) | ||||
@reraise_safe_exceptions | ||||
def init(self, wire): | ||||
repo_path = safe_str(wire['path']) | ||||
r1184 | os.makedirs(repo_path, mode=0o755) | |||
r1149 | pygit2.init_repository(repo_path, bare=False) | |||
r1145 | ||||
@reraise_safe_exceptions | ||||
def init_bare(self, wire): | ||||
repo_path = safe_str(wire['path']) | ||||
r1184 | os.makedirs(repo_path, mode=0o755) | |||
r1149 | pygit2.init_repository(repo_path, bare=True) | |||
r1145 | ||||
@reraise_safe_exceptions | ||||
def revision(self, wire, rev): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _revision(_context_uid, _repo_id, _rev): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[rev] | ||||
obj_data = { | ||||
'id': commit.id.hex, | ||||
} | ||||
# tree objects itself don't have tree_id attribute | ||||
if hasattr(commit, 'tree_id'): | ||||
obj_data['tree'] = commit.tree_id.hex | ||||
return obj_data | ||||
return _revision(context_uid, repo_id, rev) | ||||
@reraise_safe_exceptions | ||||
def date(self, wire, commit_id): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _date(_repo_id, _commit_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
if hasattr(commit, 'commit_time'): | ||||
commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset | ||||
else: | ||||
commit = commit.get_object() | ||||
commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset | ||||
# TODO(marcink): check dulwich difference of offset vs timezone | ||||
return [commit_time, commit_time_offset] | ||||
return _date(repo_id, commit_id) | ||||
@reraise_safe_exceptions | ||||
def author(self, wire, commit_id): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _author(_repo_id, _commit_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
if hasattr(commit, 'author'): | ||||
author = commit.author | ||||
else: | ||||
author = commit.get_object().author | ||||
if author.email: | ||||
return f"{author.name} <{author.email}>" | ||||
try: | ||||
return f"{author.name}" | ||||
except Exception: | ||||
return f"{safe_str(author.raw_name)}" | ||||
return _author(repo_id, commit_id) | ||||
@reraise_safe_exceptions | ||||
def message(self, wire, commit_id): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _message(_repo_id, _commit_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
return commit.message | ||||
return _message(repo_id, commit_id) | ||||
@reraise_safe_exceptions | ||||
def parents(self, wire, commit_id): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _parents(_repo_id, _commit_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
if hasattr(commit, 'parent_ids'): | ||||
parent_ids = commit.parent_ids | ||||
else: | ||||
parent_ids = commit.get_object().parent_ids | ||||
return [x.hex for x in parent_ids] | ||||
return _parents(repo_id, commit_id) | ||||
@reraise_safe_exceptions | ||||
def children(self, wire, commit_id): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
head = self.head(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _children(_repo_id, _commit_id): | ||||
output, __ = self.run_git_command( | ||||
wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}']) | ||||
child_ids = [] | ||||
pat = re.compile(fr'^{commit_id}') | ||||
for line in output.splitlines(): | ||||
line = safe_str(line) | ||||
if pat.match(line): | ||||
found_ids = line.split(' ')[1:] | ||||
child_ids.extend(found_ids) | ||||
break | ||||
return child_ids | ||||
return _children(repo_id, commit_id) | ||||
@reraise_safe_exceptions | ||||
def set_refs(self, wire, key, value): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
repo.references.create(key, value, force=True) | ||||
@reraise_safe_exceptions | ||||
r1191 | def update_refs(self, wire, key, value): | |||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
if key not in repo.references: | ||||
raise ValueError(f'Reference {key} not found in the repository') | ||||
repo.references.create(key, value, force=True) | ||||
@reraise_safe_exceptions | ||||
r1145 | def create_branch(self, wire, branch_name, commit_id, force=False): | |||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
if commit_id: | ||||
commit = repo[commit_id] | ||||
else: | ||||
# if commit is not given just use the HEAD | ||||
commit = repo.head() | ||||
if force: | ||||
repo.branches.local.create(branch_name, commit, force=force) | ||||
elif not repo.branches.get(branch_name): | ||||
# create only if that branch isn't existing | ||||
repo.branches.local.create(branch_name, commit, force=force) | ||||
@reraise_safe_exceptions | ||||
def remove_ref(self, wire, key): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
repo.references.delete(key) | ||||
@reraise_safe_exceptions | ||||
def tag_remove(self, wire, tag_name): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
key = f'refs/tags/{tag_name}' | ||||
repo.references.delete(key) | ||||
@reraise_safe_exceptions | ||||
def tree_changes(self, wire, source_id, target_id): | ||||
repo = self._factory.repo(wire) | ||||
# source can be empty | ||||
source_id = safe_bytes(source_id if source_id else b'') | ||||
target_id = safe_bytes(target_id) | ||||
source = repo[source_id].tree if source_id else None | ||||
target = repo[target_id].tree | ||||
result = repo.object_store.tree_changes(source, target) | ||||
added = set() | ||||
modified = set() | ||||
deleted = set() | ||||
for (old_path, new_path), (_, _), (_, _) in list(result): | ||||
if new_path and old_path: | ||||
modified.add(new_path) | ||||
elif new_path and not old_path: | ||||
added.add(new_path) | ||||
elif not new_path and old_path: | ||||
deleted.add(old_path) | ||||
return list(added), list(modified), list(deleted) | ||||
@reraise_safe_exceptions | ||||
def tree_and_type_for_path(self, wire, commit_id, path): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
try: | ||||
tree = commit.tree[path] | ||||
except KeyError: | ||||
return None, None, None | ||||
return tree.id.hex, tree.type_str, tree.filemode | ||||
return _tree_and_type_for_path(context_uid, repo_id, commit_id, path) | ||||
@reraise_safe_exceptions | ||||
def tree_items(self, wire, tree_id): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _tree_items(_repo_id, _tree_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
try: | ||||
tree = repo[tree_id] | ||||
except KeyError: | ||||
raise ObjectMissing(f'No tree with id: {tree_id}') | ||||
result = [] | ||||
for item in tree: | ||||
item_sha = item.hex | ||||
item_mode = item.filemode | ||||
item_type = item.type_str | ||||
if item_type == 'commit': | ||||
# NOTE(marcink): submodules we translate to 'link' for backward compat | ||||
item_type = 'link' | ||||
result.append((item.name, item_mode, item_sha, item_type)) | ||||
return result | ||||
return _tree_items(repo_id, tree_id) | ||||
@reraise_safe_exceptions | ||||
def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context): | ||||
""" | ||||
Old version that uses subprocess to call diff | ||||
""" | ||||
flags = [ | ||||
r1152 | f'-U{context}', '--patch', | |||
r1145 | '--binary', | |||
'--find-renames', | ||||
'--no-indent-heuristic', | ||||
# '--indent-heuristic', | ||||
#'--full-index', | ||||
#'--abbrev=40' | ||||
] | ||||
if opt_ignorews: | ||||
flags.append('--ignore-all-space') | ||||
if commit_id_1 == self.EMPTY_COMMIT: | ||||
cmd = ['show'] + flags + [commit_id_2] | ||||
else: | ||||
cmd = ['diff'] + flags + [commit_id_1, commit_id_2] | ||||
if file_filter: | ||||
cmd.extend(['--', file_filter]) | ||||
diff, __ = self.run_git_command(wire, cmd) | ||||
# If we used 'show' command, strip first few lines (until actual diff | ||||
# starts) | ||||
if commit_id_1 == self.EMPTY_COMMIT: | ||||
lines = diff.splitlines() | ||||
x = 0 | ||||
for line in lines: | ||||
if line.startswith(b'diff'): | ||||
break | ||||
x += 1 | ||||
# Append new line just like 'diff' command do | ||||
diff = '\n'.join(lines[x:]) + '\n' | ||||
return diff | ||||
@reraise_safe_exceptions | ||||
def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
swap = True | ||||
flags = 0 | ||||
flags |= pygit2.GIT_DIFF_SHOW_BINARY | ||||
if opt_ignorews: | ||||
flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE | ||||
if commit_id_1 == self.EMPTY_COMMIT: | ||||
comm1 = repo[commit_id_2] | ||||
diff_obj = comm1.tree.diff_to_tree( | ||||
flags=flags, context_lines=context, swap=swap) | ||||
else: | ||||
comm1 = repo[commit_id_2] | ||||
comm2 = repo[commit_id_1] | ||||
diff_obj = comm1.tree.diff_to_tree( | ||||
comm2.tree, flags=flags, context_lines=context, swap=swap) | ||||
similar_flags = 0 | ||||
similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES | ||||
diff_obj.find_similar(flags=similar_flags) | ||||
if file_filter: | ||||
for p in diff_obj: | ||||
if p.delta.old_file.path == file_filter: | ||||
return BytesEnvelope(p.data) or BytesEnvelope(b'') | ||||
# fo matching path == no diff | ||||
return BytesEnvelope(b'') | ||||
return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'') | ||||
@reraise_safe_exceptions | ||||
def node_history(self, wire, commit_id, path, limit): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit): | ||||
# optimize for n==1, rev-list is much faster for that use-case | ||||
if limit == 1: | ||||
cmd = ['rev-list', '-1', commit_id, '--', path] | ||||
else: | ||||
cmd = ['log'] | ||||
if limit: | ||||
cmd.extend(['-n', str(safe_int(limit, 0))]) | ||||
cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path]) | ||||
output, __ = self.run_git_command(wire, cmd) | ||||
commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output) | ||||
return [x for x in commit_ids] | ||||
return _node_history(context_uid, repo_id, commit_id, path, limit) | ||||
@reraise_safe_exceptions | ||||
def node_annotate_legacy(self, wire, commit_id, path): | ||||
# note: replaced by pygit2 implementation | ||||
cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path] | ||||
# -l ==> outputs long shas (and we need all 40 characters) | ||||
# --root ==> doesn't put '^' character for boundaries | ||||
# -r commit_id ==> blames for the given commit | ||||
output, __ = self.run_git_command(wire, cmd) | ||||
result = [] | ||||
for i, blame_line in enumerate(output.splitlines()[:-1]): | ||||
line_no = i + 1 | ||||
blame_commit_id, line = re.split(rb' ', blame_line, 1) | ||||
result.append((line_no, blame_commit_id, line)) | ||||
return result | ||||
@reraise_safe_exceptions | ||||
def node_annotate(self, wire, commit_id, path): | ||||
result_libgit = [] | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
blame_obj = repo.blame(path, newest_commit=commit_id) | ||||
r1216 | file_content = commit.tree[path].data | |||
for i, line in enumerate(splitnewlines(file_content)): | ||||
r1145 | line_no = i + 1 | |||
hunk = blame_obj.for_line(line_no) | ||||
blame_commit_id = hunk.final_commit_id.hex | ||||
result_libgit.append((line_no, blame_commit_id, line)) | ||||
return BinaryEnvelope(result_libgit) | ||||
@reraise_safe_exceptions | ||||
r1197 | def update_server_info(self, wire, force=False): | |||
cmd = ['update-server-info'] | ||||
if force: | ||||
cmd += ['--force'] | ||||
output, __ = self.run_git_command(wire, cmd) | ||||
return output.splitlines() | ||||
r1145 | ||||
@reraise_safe_exceptions | ||||
def get_all_commit_ids(self, wire): | ||||
cache_on, context_uid, repo_id = self._cache_on(wire) | ||||
region = self._region(wire) | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _get_all_commit_ids(_context_uid, _repo_id): | ||||
cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags'] | ||||
try: | ||||
output, __ = self.run_git_command(wire, cmd) | ||||
return output.splitlines() | ||||
except Exception: | ||||
# Can be raised for empty repositories | ||||
return [] | ||||
@region.conditional_cache_on_arguments(condition=cache_on) | ||||
def _get_all_commit_ids_pygit2(_context_uid, _repo_id): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL | ||||
results = [] | ||||
with repo_init as repo: | ||||
for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE): | ||||
results.append(commit.id.hex) | ||||
return _get_all_commit_ids(context_uid, repo_id) | ||||
@reraise_safe_exceptions | ||||
def run_git_command(self, wire, cmd, **opts): | ||||
path = wire.get('path', None) | ||||
r1310 | debug_mode = vcsserver.ConfigGet().get_bool('debug') | |||
r1145 | ||||
if path and os.path.isdir(path): | ||||
opts['cwd'] = path | ||||
if '_bare' in opts: | ||||
_copts = [] | ||||
del opts['_bare'] | ||||
else: | ||||
r1157 | _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false'] | |||
r1145 | safe_call = False | |||
if '_safe' in opts: | ||||
# no exc on failure | ||||
del opts['_safe'] | ||||
safe_call = True | ||||
if '_copts' in opts: | ||||
_copts.extend(opts['_copts'] or []) | ||||
del opts['_copts'] | ||||
gitenv = os.environ.copy() | ||||
gitenv.update(opts.pop('extra_env', {})) | ||||
# need to clean fix GIT_DIR ! | ||||
if 'GIT_DIR' in gitenv: | ||||
del gitenv['GIT_DIR'] | ||||
gitenv['GIT_CONFIG_NOGLOBAL'] = '1' | ||||
gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1' | ||||
r1230 | cmd = [settings.GIT_EXECUTABLE()] + _copts + cmd | |||
r1145 | _opts = {'env': gitenv, 'shell': False} | |||
proc = None | ||||
try: | ||||
_opts.update(opts) | ||||
proc = subprocessio.SubprocessIOChunker(cmd, **_opts) | ||||
return b''.join(proc), b''.join(proc.stderr) | ||||
except OSError as err: | ||||
cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD | ||||
r1181 | call_opts = {} | |||
if debug_mode: | ||||
call_opts = _opts | ||||
tb_err = ("Couldn't run git command ({}).\n" | ||||
"Original error was:{}\n" | ||||
"Call options:{}\n" | ||||
.format(cmd, err, call_opts)) | ||||
r1145 | log.exception(tb_err) | |||
if safe_call: | ||||
return '', err | ||||
else: | ||||
raise exceptions.VcsException()(tb_err) | ||||
finally: | ||||
if proc: | ||||
proc.close() | ||||
@reraise_safe_exceptions | ||||
def install_hooks(self, wire, force=False): | ||||
from vcsserver.hook_utils import install_git_hooks | ||||
bare = self.bare(wire) | ||||
path = wire['path'] | ||||
binary_dir = settings.BINARY_DIR | ||||
if binary_dir: | ||||
os.path.join(binary_dir, 'python3') | ||||
return install_git_hooks(path, bare, force_create=force) | ||||
@reraise_safe_exceptions | ||||
def get_hooks_info(self, wire): | ||||
from vcsserver.hook_utils import ( | ||||
get_git_pre_hook_version, get_git_post_hook_version) | ||||
bare = self.bare(wire) | ||||
path = wire['path'] | ||||
return { | ||||
'pre_version': get_git_pre_hook_version(path, bare), | ||||
'post_version': get_git_post_hook_version(path, bare), | ||||
} | ||||
@reraise_safe_exceptions | ||||
def set_head_ref(self, wire, head_name): | ||||
log.debug('Setting refs/head to `%s`', head_name) | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
repo.set_head(f'refs/heads/{head_name}') | ||||
return [head_name] + [f'set HEAD to refs/heads/{head_name}'] | ||||
@reraise_safe_exceptions | ||||
def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path, | ||||
archive_dir_name, commit_id, cache_config): | ||||
def file_walker(_commit_id, path): | ||||
repo_init = self._factory.repo_libgit2(wire) | ||||
with repo_init as repo: | ||||
commit = repo[commit_id] | ||||
if path in ['', '/']: | ||||
tree = commit.tree | ||||
else: | ||||
tree = commit.tree[path.rstrip('/')] | ||||
tree_id = tree.id.hex | ||||
try: | ||||
tree = repo[tree_id] | ||||
except KeyError: | ||||
raise ObjectMissing(f'No tree with id: {tree_id}') | ||||
index = LibGit2Index.Index() | ||||
index.read_tree(tree) | ||||
file_iter = index | ||||
for file_node in file_iter: | ||||
file_path = file_node.path | ||||
mode = file_node.mode | ||||
is_link = stat.S_ISLNK(mode) | ||||
if mode == pygit2.GIT_FILEMODE_COMMIT: | ||||
log.debug('Skipping path %s as a commit node', file_path) | ||||
continue | ||||
yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw) | ||||
return store_archive_in_cache( | ||||
file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config) | ||||