commit.py
450 lines
| 14.8 KiB
| text/x-python
|
PythonLexer
r5608 | # Copyright (C) 2014-2024 RhodeCode GmbH | |||
r1 | # | |||
# This program is free software: you can redistribute it and/or modify | ||||
# it under the terms of the GNU Affero General Public License, version 3 | ||||
# (only), as published by the Free Software Foundation. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU Affero General Public License | ||||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
# | ||||
# This program is dual-licensed. If you wish to learn more about the | ||||
# RhodeCode Enterprise Edition, including its added features, Support services, | ||||
# and proprietary license terms, please see https://rhodecode.com/licenses/ | ||||
""" | ||||
GIT commit module | ||||
""" | ||||
r5126 | import io | |||
r4927 | import configparser | |||
r5647 | import logging | |||
r1 | from itertools import chain | |||
from zope.cachedescriptors.property import Lazy as LazyProperty | ||||
r154 | from rhodecode.lib.datelib import utcdate_fromtimestamp | |||
r5074 | from rhodecode.lib.str_utils import safe_bytes, safe_str | |||
r1 | from rhodecode.lib.vcs.backends import base | |||
from rhodecode.lib.vcs.exceptions import CommitError, NodeDoesNotExistError | ||||
from rhodecode.lib.vcs.nodes import ( | ||||
r5647 | FileNode, | |||
DirNode, | ||||
NodeKind, | ||||
RootNode, | ||||
SubModuleNode, | ||||
LargeFileNode, | ||||
) | ||||
from rhodecode.lib.vcs_common import FILEMODE_LINK | ||||
log = logging.getLogger(__name__) | ||||
r1 | ||||
class GitCommit(base.BaseCommit): | ||||
""" | ||||
Represents state of the repository at single commit id. | ||||
""" | ||||
_filter_pre_load = [ | ||||
# done through a more complex tree walk on parents | ||||
"affected_files", | ||||
# done through subprocess not remote call | ||||
"children", | ||||
# done through a more complex tree walk on parents | ||||
"status", | ||||
# mercurial specific property not supported here | ||||
r5647 | "obsolete", | |||
r2130 | # mercurial specific property not supported here | |||
r5647 | "phase", | |||
r2130 | # mercurial specific property not supported here | |||
r5647 | "hidden", | |||
r1 | ] | |||
def __init__(self, repository, raw_id, idx, pre_load=None): | ||||
self.repository = repository | ||||
self._remote = repository._remote | ||||
# TODO: johbo: Tweak of raw_id should not be necessary | ||||
self.raw_id = safe_str(raw_id) | ||||
self.idx = idx | ||||
self._set_bulk_properties(pre_load) | ||||
# caches | ||||
self.nodes = {} | ||||
r5647 | self._path_mode_cache = {} # path stats cache, e.g filemode etc | |||
self._path_type_cache = {} # path type dir/file/link etc cache | ||||
r1 | self._submodules = None | |||
def _set_bulk_properties(self, pre_load): | ||||
if not pre_load: | ||||
return | ||||
r5647 | pre_load = [entry for entry in pre_load if entry not in self._filter_pre_load] | |||
r1 | if not pre_load: | |||
return | ||||
result = self._remote.bulk_request(self.raw_id, pre_load) | ||||
for attr, value in result.items(): | ||||
if attr in ["author", "message"]: | ||||
if value: | ||||
r5074 | value = safe_str(value) | |||
r1 | elif attr == "date": | |||
r154 | value = utcdate_fromtimestamp(*value) | |||
r1 | elif attr == "parents": | |||
value = self._make_commits(value) | ||||
r3850 | elif attr == "branch": | |||
r4659 | value = self._set_branch(value) | |||
r1 | self.__dict__[attr] = value | |||
@LazyProperty | ||||
def _commit(self): | ||||
return self._remote[self.raw_id] | ||||
@LazyProperty | ||||
def _tree_id(self): | ||||
r5647 | return self._remote[self._commit["tree"]]["id"] | |||
r1 | ||||
@LazyProperty | ||||
def id(self): | ||||
return self.raw_id | ||||
@LazyProperty | ||||
def short_id(self): | ||||
return self.raw_id[:12] | ||||
@LazyProperty | ||||
def message(self): | ||||
r5074 | return safe_str(self._remote.message(self.id)) | |||
r1 | ||||
@LazyProperty | ||||
def committer(self): | ||||
r5074 | return safe_str(self._remote.author(self.id)) | |||
r1 | ||||
@LazyProperty | ||||
def author(self): | ||||
r5074 | return safe_str(self._remote.author(self.id)) | |||
r1 | ||||
@LazyProperty | ||||
def date(self): | ||||
r3842 | unix_ts, tz = self._remote.date(self.raw_id) | |||
r154 | return utcdate_fromtimestamp(unix_ts, tz) | |||
r1 | ||||
@LazyProperty | ||||
def status(self): | ||||
""" | ||||
Returns modified, added, removed, deleted files for current commit | ||||
""" | ||||
r5647 | added, modified, deleted = self._changes_cache | |||
return list(modified), list(modified), list(deleted) | ||||
r1 | ||||
@LazyProperty | ||||
def tags(self): | ||||
r5647 | tags = [safe_str(name) for name, commit_id in self.repository.tags.items() if commit_id == self.raw_id] | |||
r1 | return tags | |||
@LazyProperty | ||||
r3842 | def commit_branches(self): | |||
branches = [] | ||||
r4932 | for name, commit_id in self.repository.branches.items(): | |||
r1 | if commit_id == self.raw_id: | |||
r3842 | branches.append(name) | |||
return branches | ||||
r4659 | def _set_branch(self, branches): | |||
if branches: | ||||
# actually commit can have multiple branches in git | ||||
r5074 | return safe_str(branches[0]) | |||
r4659 | ||||
r3842 | @LazyProperty | |||
def branch(self): | ||||
r3853 | branches = self._remote.branch(self.raw_id) | |||
r4659 | return self._set_branch(branches) | |||
r1 | ||||
r5647 | def _get_path_tree_id_and_type(self, path: bytes): | |||
r5074 | ||||
r5647 | if path in self._path_type_cache: | |||
return self._path_type_cache[path] | ||||
r1 | ||||
r5647 | if path == b"": | |||
self._path_type_cache[b""] = [self._tree_id, NodeKind.DIR] | ||||
return self._path_type_cache[path] | ||||
r1 | ||||
r5647 | tree_id, tree_type, tree_mode = self._remote.tree_and_type_for_path(self.raw_id, path) | |||
r3842 | if tree_id is None: | |||
raise self.no_node_at_path(path) | ||||
r1 | ||||
r5647 | self._path_type_cache[path] = [tree_id, tree_type] | |||
self._path_mode_cache[path] = tree_mode | ||||
r1 | ||||
r5647 | return self._path_type_cache[path] | |||
r1 | ||||
def _get_kind(self, path): | ||||
r5647 | path = self._fix_path(path) | |||
_, path_type = self._get_path_tree_id_and_type(path) | ||||
return path_type | ||||
r1 | ||||
r5074 | def _assert_is_path(self, path): | |||
r1 | path = self._fix_path(path) | |||
if self._get_kind(path) != NodeKind.FILE: | ||||
r5647 | raise CommitError(f"File at path={path} does not exist for commit {self.raw_id}") | |||
r1 | return path | |||
def _get_file_nodes(self): | ||||
return chain(*(t[2] for t in self.walk())) | ||||
@LazyProperty | ||||
def parents(self): | ||||
""" | ||||
Returns list of parent commits. | ||||
""" | ||||
r3842 | parent_ids = self._remote.parents(self.id) | |||
r1 | return self._make_commits(parent_ids) | |||
@LazyProperty | ||||
def children(self): | ||||
""" | ||||
Returns list of child commits. | ||||
""" | ||||
r3862 | children = self._remote.children(self.raw_id) | |||
return self._make_commits(children) | ||||
r1 | ||||
r3842 | def _make_commits(self, commit_ids): | |||
def commit_maker(_commit_id): | ||||
r5074 | return self.repository.get_commit(commit_id=_commit_id) | |||
r3842 | ||||
return [commit_maker(commit_id) for commit_id in commit_ids] | ||||
r1 | ||||
r5074 | def get_file_mode(self, path: bytes): | |||
r1 | """ | |||
Returns stat mode of the file at the given `path`. | ||||
""" | ||||
r5074 | path = self._assert_is_path(path) | |||
r1 | # ensure path is traversed | |||
r5647 | self._get_path_tree_id_and_type(path) | |||
return self._path_mode_cache[path] | ||||
r5074 | ||||
r5647 | def is_link(self, path: bytes): | |||
path = self._assert_is_path(path) | ||||
if path not in self._path_mode_cache: | ||||
self._path_mode_cache[path] = self._remote.fctx_flags(self.raw_id, path) | ||||
r1 | ||||
r5647 | return self._path_mode_cache[path] == FILEMODE_LINK | |||
r1 | ||||
r3896 | def is_node_binary(self, path): | |||
r5647 | tree_id, _ = self._get_path_tree_id_and_type(path) | |||
r3896 | return self._remote.is_binary(tree_id) | |||
r5074 | def node_md5_hash(self, path): | |||
path = self._assert_is_path(path) | ||||
return self._remote.md5_hash(self.raw_id, path) | ||||
r1 | def get_file_content(self, path): | |||
""" | ||||
Returns content of the file at given `path`. | ||||
""" | ||||
r5647 | tree_id, _ = self._get_path_tree_id_and_type(path) | |||
r3842 | return self._remote.blob_as_pretty_string(tree_id) | |||
r1 | ||||
r3895 | def get_file_content_streamed(self, path): | |||
r5647 | tree_id, _ = self._get_path_tree_id_and_type(path) | |||
stream_method = getattr(self._remote, "stream:blob_as_pretty_string") | ||||
r3895 | return stream_method(tree_id) | |||
r1 | def get_file_size(self, path): | |||
""" | ||||
Returns size of the file at given `path`. | ||||
""" | ||||
r5647 | tree_id, _ = self._get_path_tree_id_and_type(path) | |||
r3842 | return self._remote.blob_raw_length(tree_id) | |||
r1 | ||||
r3275 | def get_path_history(self, path, limit=None, pre_load=None): | |||
r1 | """ | |||
Returns history of file as reversed list of `GitCommit` objects for | ||||
which file at given `path` has been modified. | ||||
""" | ||||
r5074 | path = self._assert_is_path(path) | |||
r5647 | history = self._remote.node_history(self.raw_id, path, limit) | |||
return [self.repository.get_commit(commit_id=commit_id, pre_load=pre_load) for commit_id in history] | ||||
r1 | ||||
def get_file_annotate(self, path, pre_load=None): | ||||
""" | ||||
Returns a generator of four element tuples with | ||||
lineno, commit_id, commit lazy loader and line | ||||
r3862 | """ | |||
r1 | ||||
r3862 | result = self._remote.node_annotate(self.raw_id, path) | |||
r1 | ||||
r3862 | for ln_no, commit_id, content in result: | |||
r1 | yield ( | |||
r5647 | ln_no, | |||
commit_id, | ||||
r3862 | lambda: self.repository.get_commit(commit_id=commit_id, pre_load=pre_load), | |||
r5647 | content, | |||
) | ||||
r1 | ||||
r5647 | def get_nodes(self, path: bytes, pre_load=None): | |||
r3842 | ||||
r1 | if self._get_kind(path) != NodeKind.DIR: | |||
r5647 | raise CommitError(f"Directory does not exist for commit {self.raw_id} at '{path}'") | |||
r1 | path = self._fix_path(path) | |||
r3842 | ||||
r5647 | path_nodes = [] | |||
r5651 | for obj_name, stat_, tree_item_id, node_kind, pre_load_data in self._remote.get_nodes(self.raw_id, path, pre_load): | |||
r5647 | if node_kind is None: | |||
raise CommitError(f"Requested object type={node_kind} cannot be determined") | ||||
r1 | ||||
r5651 | if path == b"": | |||
obj_path = obj_name | ||||
r1 | else: | |||
r5651 | obj_path = b"/".join((path, obj_name)) | |||
r1 | ||||
r5647 | # cache file mode for git, since we have it already | |||
if obj_path not in self._path_mode_cache: | ||||
self._path_mode_cache[obj_path] = stat_ | ||||
# cache type | ||||
if node_kind not in self._path_type_cache: | ||||
self._path_type_cache[obj_path] = [tree_item_id, node_kind] | ||||
r1 | ||||
r5647 | entry = None | |||
if obj_path in self.nodes: | ||||
entry = self.nodes[obj_path] | ||||
else: | ||||
if node_kind == NodeKind.SUBMODULE: | ||||
r5651 | url = self._get_submodule_url(obj_path) | |||
entry= SubModuleNode(obj_name, url=url, commit=tree_item_id, alias=self.repository.alias) | ||||
r5647 | elif node_kind == NodeKind.DIR: | |||
entry = DirNode(safe_bytes(obj_path), commit=self) | ||||
elif node_kind == NodeKind.FILE: | ||||
r5651 | entry = FileNode(safe_bytes(obj_path), commit=self, mode=stat_, pre_load_data=pre_load_data) | |||
r1 | ||||
r5647 | if entry: | |||
self.nodes[obj_path] = entry | ||||
path_nodes.append(entry) | ||||
path_nodes.sort() | ||||
return path_nodes | ||||
def get_node(self, path: bytes, pre_load=None): | ||||
r1 | path = self._fix_path(path) | |||
r5647 | ||||
# use cached, if we have one | ||||
if path in self.nodes: | ||||
return self.nodes[path] | ||||
r1 | ||||
r5647 | try: | |||
tree_id, path_type = self._get_path_tree_id_and_type(path) | ||||
except CommitError: | ||||
raise NodeDoesNotExistError(f"Cannot find one of parents' directories for a given path: {path}") | ||||
if path == b"": | ||||
node = RootNode(commit=self) | ||||
else: | ||||
if path_type == NodeKind.SUBMODULE: | ||||
r1 | url = self._get_submodule_url(path) | |||
r5647 | node = SubModuleNode(path, url=url, commit=tree_id, alias=self.repository.alias) | |||
elif path_type == NodeKind.DIR: | ||||
node = DirNode(safe_bytes(path), commit=self) | ||||
elif path_type == NodeKind.FILE: | ||||
r5074 | node = FileNode(safe_bytes(path), commit=self, pre_load=pre_load) | |||
r5647 | self._path_mode_cache[path] = node.mode | |||
r1 | else: | |||
raise self.no_node_at_path(path) | ||||
r5647 | # cache node | |||
self.nodes[path] = node | ||||
r1 | return self.nodes[path] | |||
r5647 | def get_largefile_node(self, path: bytes): | |||
tree_id, _ = self._get_path_tree_id_and_type(path) | ||||
r3842 | pointer_spec = self._remote.is_large_file(tree_id) | |||
r1577 | ||||
if pointer_spec: | ||||
# content of that file regular FileNode is the hash of largefile | ||||
r5647 | file_id = pointer_spec.get("oid_hash") | |||
if not self._remote.in_largefiles_store(file_id): | ||||
log.warning(f'Largefile oid={file_id} not found in store') | ||||
return None | ||||
lf_path = self._remote.store_path(file_id) | ||||
return LargeFileNode(safe_bytes(lf_path), commit=self, org_path=path) | ||||
r1577 | ||||
r1 | @LazyProperty | |||
r5647 | def affected_files(self) -> list[bytes]: | |||
r1 | """ | |||
Gets a fast accessible file changes for given commit | ||||
""" | ||||
added, modified, deleted = self._changes_cache | ||||
return list(added.union(modified).union(deleted)) | ||||
@LazyProperty | ||||
r5647 | def _changes_cache(self) -> tuple[set, set, set]: | |||
r1 | added = set() | |||
modified = set() | ||||
deleted = set() | ||||
parents = self.parents | ||||
if not self.parents: | ||||
parents = [base.EmptyCommit()] | ||||
for parent in parents: | ||||
if isinstance(parent, base.EmptyCommit): | ||||
oid = None | ||||
else: | ||||
oid = parent.raw_id | ||||
r5074 | _added, _modified, _deleted = self._remote.tree_changes(oid, self.raw_id) | |||
added = added | set(_added) | ||||
modified = modified | set(_modified) | ||||
deleted = deleted | set(_deleted) | ||||
r1 | return added, modified, deleted | |||
def _get_paths_for_status(self, status): | ||||
""" | ||||
Returns sorted list of paths for given ``status``. | ||||
:param status: one of: *added*, *modified* or *deleted* | ||||
""" | ||||
added, modified, deleted = self._changes_cache | ||||
r5647 | return sorted({"added": list(added), "modified": list(modified), "deleted": list(deleted)}[status]) | |||
r4242 | ||||
@LazyProperty | ||||
def added_paths(self): | ||||
r5647 | return [n for n in self._get_paths_for_status("added")] | |||
r4242 | ||||
@LazyProperty | ||||
def changed_paths(self): | ||||
r5647 | return [n for n in self._get_paths_for_status("modified")] | |||
r4242 | ||||
@LazyProperty | ||||
def removed_paths(self): | ||||
r5647 | return [n for n in self._get_paths_for_status("deleted")] | |||
r1 | ||||
r5647 | def _get_submodule_url(self, submodule_path: bytes): | |||
git_modules_path = b".gitmodules" | ||||
r1 | ||||
if self._submodules is None: | ||||
self._submodules = {} | ||||
try: | ||||
submodules_node = self.get_node(git_modules_path) | ||||
except NodeDoesNotExistError: | ||||
return None | ||||
r4239 | parser = configparser.RawConfigParser() | |||
r5126 | parser.read_file(io.StringIO(submodules_node.str_content)) | |||
r1 | ||||
for section in parser.sections(): | ||||
r5647 | path = parser.get(section, "path") | |||
url = parser.get(section, "url") | ||||
r1 | if path and url: | |||
r5647 | self._submodules[safe_bytes(path).strip(b"/")] = url | |||
r1 | ||||
r5647 | return self._submodules.get(submodule_path.strip(b"/")) | |||