##// END OF EJS Templates
docs: updated docs for importing repos and fixed howto on building them
docs: updated docs for importing repos and fixed howto on building them

File last commit:

r5651:bad147da default
r5654:f7519e5d default
Show More
commit.py
450 lines | 14.8 KiB | text/x-python | PythonLexer
core: updated copyright to 2024
r5608 # Copyright (C) 2014-2024 RhodeCode GmbH
project: added all source files and assets
r1 #
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/
"""
GIT commit module
"""
git: fix submodule handling for git repositories
r5126 import io
python3: fix import configparser
r4927 import configparser
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 import logging
project: added all source files and assets
r1 from itertools import chain
from zope.cachedescriptors.property import Lazy as LazyProperty
dan
vcs: make commit datetimes utc
r154 from rhodecode.lib.datelib import utcdate_fromtimestamp
vcs-lib: bulk of changes for python3 support
r5074 from rhodecode.lib.str_utils import safe_bytes, safe_str
project: added all source files and assets
r1 from rhodecode.lib.vcs.backends import base
from rhodecode.lib.vcs.exceptions import CommitError, NodeDoesNotExistError
from rhodecode.lib.vcs.nodes import (
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 FileNode,
DirNode,
NodeKind,
RootNode,
SubModuleNode,
LargeFileNode,
)
from rhodecode.lib.vcs_common import FILEMODE_LINK
log = logging.getLogger(__name__)
project: added all source files and assets
r1
class GitCommit(base.BaseCommit):
"""
Represents state of the repository at single commit id.
"""
_filter_pre_load = [
# done through a more complex tree walk on parents
"affected_files",
# done through subprocess not remote call
"children",
# done through a more complex tree walk on parents
"status",
# mercurial specific property not supported here
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 "obsolete",
changelog: fix and optimize loading of chunks for file history....
r2130 # mercurial specific property not supported here
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 "phase",
changelog: fix and optimize loading of chunks for file history....
r2130 # mercurial specific property not supported here
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 "hidden",
project: added all source files and assets
r1 ]
def __init__(self, repository, raw_id, idx, pre_load=None):
self.repository = repository
self._remote = repository._remote
# TODO: johbo: Tweak of raw_id should not be necessary
self.raw_id = safe_str(raw_id)
self.idx = idx
self._set_bulk_properties(pre_load)
# caches
self.nodes = {}
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 self._path_mode_cache = {} # path stats cache, e.g filemode etc
self._path_type_cache = {} # path type dir/file/link etc cache
project: added all source files and assets
r1 self._submodules = None
def _set_bulk_properties(self, pre_load):
if not pre_load:
return
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 pre_load = [entry for entry in pre_load if entry not in self._filter_pre_load]
project: added all source files and assets
r1 if not pre_load:
return
result = self._remote.bulk_request(self.raw_id, pre_load)
for attr, value in result.items():
if attr in ["author", "message"]:
if value:
vcs-lib: bulk of changes for python3 support
r5074 value = safe_str(value)
project: added all source files and assets
r1 elif attr == "date":
dan
vcs: make commit datetimes utc
r154 value = utcdate_fromtimestamp(*value)
project: added all source files and assets
r1 elif attr == "parents":
value = self._make_commits(value)
vcs: optimized pre-load attributes for better caching.
r3850 elif attr == "branch":
git: fix for unicode branches
r4659 value = self._set_branch(value)
project: added all source files and assets
r1 self.__dict__[attr] = value
@LazyProperty
def _commit(self):
return self._remote[self.raw_id]
@LazyProperty
def _tree_id(self):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 return self._remote[self._commit["tree"]]["id"]
project: added all source files and assets
r1
@LazyProperty
def id(self):
return self.raw_id
@LazyProperty
def short_id(self):
return self.raw_id[:12]
@LazyProperty
def message(self):
vcs-lib: bulk of changes for python3 support
r5074 return safe_str(self._remote.message(self.id))
project: added all source files and assets
r1
@LazyProperty
def committer(self):
vcs-lib: bulk of changes for python3 support
r5074 return safe_str(self._remote.author(self.id))
project: added all source files and assets
r1
@LazyProperty
def author(self):
vcs-lib: bulk of changes for python3 support
r5074 return safe_str(self._remote.author(self.id))
project: added all source files and assets
r1
@LazyProperty
def date(self):
git: adjusted code for new libgit2 backend...
r3842 unix_ts, tz = self._remote.date(self.raw_id)
dan
vcs: make commit datetimes utc
r154 return utcdate_fromtimestamp(unix_ts, tz)
project: added all source files and assets
r1
@LazyProperty
def status(self):
"""
Returns modified, added, removed, deleted files for current commit
"""
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 added, modified, deleted = self._changes_cache
return list(modified), list(modified), list(deleted)
project: added all source files and assets
r1
@LazyProperty
def tags(self):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 tags = [safe_str(name) for name, commit_id in self.repository.tags.items() if commit_id == self.raw_id]
project: added all source files and assets
r1 return tags
@LazyProperty
git: adjusted code for new libgit2 backend...
r3842 def commit_branches(self):
branches = []
python3: removed usage of .iteritems()
r4932 for name, commit_id in self.repository.branches.items():
project: added all source files and assets
r1 if commit_id == self.raw_id:
git: adjusted code for new libgit2 backend...
r3842 branches.append(name)
return branches
git: fix for unicode branches
r4659 def _set_branch(self, branches):
if branches:
# actually commit can have multiple branches in git
vcs-lib: bulk of changes for python3 support
r5074 return safe_str(branches[0])
git: fix for unicode branches
r4659
git: adjusted code for new libgit2 backend...
r3842 @LazyProperty
def branch(self):
core: added more accurate time measurement for called functions
r3853 branches = self._remote.branch(self.raw_id)
git: fix for unicode branches
r4659 return self._set_branch(branches)
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 def _get_path_tree_id_and_type(self, path: bytes):
vcs-lib: bulk of changes for python3 support
r5074
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 if path in self._path_type_cache:
return self._path_type_cache[path]
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 if path == b"":
self._path_type_cache[b""] = [self._tree_id, NodeKind.DIR]
return self._path_type_cache[path]
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 tree_id, tree_type, tree_mode = self._remote.tree_and_type_for_path(self.raw_id, path)
git: adjusted code for new libgit2 backend...
r3842 if tree_id is None:
raise self.no_node_at_path(path)
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 self._path_type_cache[path] = [tree_id, tree_type]
self._path_mode_cache[path] = tree_mode
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 return self._path_type_cache[path]
project: added all source files and assets
r1
def _get_kind(self, path):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 path = self._fix_path(path)
_, path_type = self._get_path_tree_id_and_type(path)
return path_type
project: added all source files and assets
r1
vcs-lib: bulk of changes for python3 support
r5074 def _assert_is_path(self, path):
project: added all source files and assets
r1 path = self._fix_path(path)
if self._get_kind(path) != NodeKind.FILE:
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 raise CommitError(f"File at path={path} does not exist for commit {self.raw_id}")
project: added all source files and assets
r1 return path
def _get_file_nodes(self):
return chain(*(t[2] for t in self.walk()))
@LazyProperty
def parents(self):
"""
Returns list of parent commits.
"""
git: adjusted code for new libgit2 backend...
r3842 parent_ids = self._remote.parents(self.id)
project: added all source files and assets
r1 return self._make_commits(parent_ids)
@LazyProperty
def children(self):
"""
Returns list of child commits.
"""
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 children = self._remote.children(self.raw_id)
return self._make_commits(children)
project: added all source files and assets
r1
git: adjusted code for new libgit2 backend...
r3842 def _make_commits(self, commit_ids):
def commit_maker(_commit_id):
vcs-lib: bulk of changes for python3 support
r5074 return self.repository.get_commit(commit_id=_commit_id)
git: adjusted code for new libgit2 backend...
r3842
return [commit_maker(commit_id) for commit_id in commit_ids]
project: added all source files and assets
r1
vcs-lib: bulk of changes for python3 support
r5074 def get_file_mode(self, path: bytes):
project: added all source files and assets
r1 """
Returns stat mode of the file at the given `path`.
"""
vcs-lib: bulk of changes for python3 support
r5074 path = self._assert_is_path(path)
project: added all source files and assets
r1 # ensure path is traversed
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 self._get_path_tree_id_and_type(path)
return self._path_mode_cache[path]
vcs-lib: bulk of changes for python3 support
r5074
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 def is_link(self, path: bytes):
path = self._assert_is_path(path)
if path not in self._path_mode_cache:
self._path_mode_cache[path] = self._remote.fctx_flags(self.raw_id, path)
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 return self._path_mode_cache[path] == FILEMODE_LINK
project: added all source files and assets
r1
dan
vcsserver: made binary content check be calculated on vcsserver...
r3896 def is_node_binary(self, path):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 tree_id, _ = self._get_path_tree_id_and_type(path)
dan
vcsserver: made binary content check be calculated on vcsserver...
r3896 return self._remote.is_binary(tree_id)
vcs-lib: bulk of changes for python3 support
r5074 def node_md5_hash(self, path):
path = self._assert_is_path(path)
return self._remote.md5_hash(self.raw_id, path)
project: added all source files and assets
r1 def get_file_content(self, path):
"""
Returns content of the file at given `path`.
"""
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 tree_id, _ = self._get_path_tree_id_and_type(path)
git: adjusted code for new libgit2 backend...
r3842 return self._remote.blob_as_pretty_string(tree_id)
project: added all source files and assets
r1
dan
file-nodes: added streaming remote attributes for vcsserver....
r3895 def get_file_content_streamed(self, path):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 tree_id, _ = self._get_path_tree_id_and_type(path)
stream_method = getattr(self._remote, "stream:blob_as_pretty_string")
dan
file-nodes: added streaming remote attributes for vcsserver....
r3895 return stream_method(tree_id)
project: added all source files and assets
r1 def get_file_size(self, path):
"""
Returns size of the file at given `path`.
"""
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 tree_id, _ = self._get_path_tree_id_and_type(path)
git: adjusted code for new libgit2 backend...
r3842 return self._remote.blob_raw_length(tree_id)
project: added all source files and assets
r1
vcs: rename get_file_history to get_path_history as it better reflects what it does.
r3275 def get_path_history(self, path, limit=None, pre_load=None):
project: added all source files and assets
r1 """
Returns history of file as reversed list of `GitCommit` objects for
which file at given `path` has been modified.
"""
vcs-lib: bulk of changes for python3 support
r5074 path = self._assert_is_path(path)
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 history = self._remote.node_history(self.raw_id, path, limit)
return [self.repository.get_commit(commit_id=commit_id, pre_load=pre_load) for commit_id in history]
project: added all source files and assets
r1
def get_file_annotate(self, path, pre_load=None):
"""
Returns a generator of four element tuples with
lineno, commit_id, commit lazy loader and line
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 """
project: added all source files and assets
r1
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 result = self._remote.node_annotate(self.raw_id, path)
project: added all source files and assets
r1
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 for ln_no, commit_id, content in result:
project: added all source files and assets
r1 yield (
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 ln_no,
commit_id,
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 lambda: self.repository.get_commit(commit_id=commit_id, pre_load=pre_load),
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 content,
)
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 def get_nodes(self, path: bytes, pre_load=None):
git: adjusted code for new libgit2 backend...
r3842
project: added all source files and assets
r1 if self._get_kind(path) != NodeKind.DIR:
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 raise CommitError(f"Directory does not exist for commit {self.raw_id} at '{path}'")
project: added all source files and assets
r1 path = self._fix_path(path)
git: adjusted code for new libgit2 backend...
r3842
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 path_nodes = []
fix(file-caching): fixed cases when old cache was used before changes to operate on bytestrings
r5651 for obj_name, stat_, tree_item_id, node_kind, pre_load_data in self._remote.get_nodes(self.raw_id, path, pre_load):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 if node_kind is None:
raise CommitError(f"Requested object type={node_kind} cannot be determined")
project: added all source files and assets
r1
fix(file-caching): fixed cases when old cache was used before changes to operate on bytestrings
r5651 if path == b"":
obj_path = obj_name
project: added all source files and assets
r1 else:
fix(file-caching): fixed cases when old cache was used before changes to operate on bytestrings
r5651 obj_path = b"/".join((path, obj_name))
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 # cache file mode for git, since we have it already
if obj_path not in self._path_mode_cache:
self._path_mode_cache[obj_path] = stat_
# cache type
if node_kind not in self._path_type_cache:
self._path_type_cache[obj_path] = [tree_item_id, node_kind]
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 entry = None
if obj_path in self.nodes:
entry = self.nodes[obj_path]
else:
if node_kind == NodeKind.SUBMODULE:
fix(file-caching): fixed cases when old cache was used before changes to operate on bytestrings
r5651 url = self._get_submodule_url(obj_path)
entry= SubModuleNode(obj_name, url=url, commit=tree_item_id, alias=self.repository.alias)
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 elif node_kind == NodeKind.DIR:
entry = DirNode(safe_bytes(obj_path), commit=self)
elif node_kind == NodeKind.FILE:
fix(file-caching): fixed cases when old cache was used before changes to operate on bytestrings
r5651 entry = FileNode(safe_bytes(obj_path), commit=self, mode=stat_, pre_load_data=pre_load_data)
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 if entry:
self.nodes[obj_path] = entry
path_nodes.append(entry)
path_nodes.sort()
return path_nodes
def get_node(self, path: bytes, pre_load=None):
project: added all source files and assets
r1 path = self._fix_path(path)
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647
# use cached, if we have one
if path in self.nodes:
return self.nodes[path]
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 try:
tree_id, path_type = self._get_path_tree_id_and_type(path)
except CommitError:
raise NodeDoesNotExistError(f"Cannot find one of parents' directories for a given path: {path}")
if path == b"":
node = RootNode(commit=self)
else:
if path_type == NodeKind.SUBMODULE:
project: added all source files and assets
r1 url = self._get_submodule_url(path)
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 node = SubModuleNode(path, url=url, commit=tree_id, alias=self.repository.alias)
elif path_type == NodeKind.DIR:
node = DirNode(safe_bytes(path), commit=self)
elif path_type == NodeKind.FILE:
vcs-lib: bulk of changes for python3 support
r5074 node = FileNode(safe_bytes(path), commit=self, pre_load=pre_load)
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 self._path_mode_cache[path] = node.mode
project: added all source files and assets
r1 else:
raise self.no_node_at_path(path)
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 # cache node
self.nodes[path] = node
project: added all source files and assets
r1 return self.nodes[path]
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 def get_largefile_node(self, path: bytes):
tree_id, _ = self._get_path_tree_id_and_type(path)
git: adjusted code for new libgit2 backend...
r3842 pointer_spec = self._remote.is_large_file(tree_id)
largefiles: enabled download of largefiles for git and mercurial from web interface....
r1577
if pointer_spec:
# content of that file regular FileNode is the hash of largefile
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 file_id = pointer_spec.get("oid_hash")
if not self._remote.in_largefiles_store(file_id):
log.warning(f'Largefile oid={file_id} not found in store')
return None
lf_path = self._remote.store_path(file_id)
return LargeFileNode(safe_bytes(lf_path), commit=self, org_path=path)
largefiles: enabled download of largefiles for git and mercurial from web interface....
r1577
project: added all source files and assets
r1 @LazyProperty
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 def affected_files(self) -> list[bytes]:
project: added all source files and assets
r1 """
Gets a fast accessible file changes for given commit
"""
added, modified, deleted = self._changes_cache
return list(added.union(modified).union(deleted))
@LazyProperty
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 def _changes_cache(self) -> tuple[set, set, set]:
project: added all source files and assets
r1 added = set()
modified = set()
deleted = set()
parents = self.parents
if not self.parents:
parents = [base.EmptyCommit()]
for parent in parents:
if isinstance(parent, base.EmptyCommit):
oid = None
else:
oid = parent.raw_id
vcs-lib: bulk of changes for python3 support
r5074 _added, _modified, _deleted = self._remote.tree_changes(oid, self.raw_id)
added = added | set(_added)
modified = modified | set(_modified)
deleted = deleted | set(_deleted)
project: added all source files and assets
r1 return added, modified, deleted
def _get_paths_for_status(self, status):
"""
Returns sorted list of paths for given ``status``.
:param status: one of: *added*, *modified* or *deleted*
"""
added, modified, deleted = self._changes_cache
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 return sorted({"added": list(added), "modified": list(modified), "deleted": list(deleted)}[status])
api: exposed modified added/modified/deleted functions of commit to return only paths....
r4242
@LazyProperty
def added_paths(self):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 return [n for n in self._get_paths_for_status("added")]
api: exposed modified added/modified/deleted functions of commit to return only paths....
r4242
@LazyProperty
def changed_paths(self):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 return [n for n in self._get_paths_for_status("modified")]
api: exposed modified added/modified/deleted functions of commit to return only paths....
r4242
@LazyProperty
def removed_paths(self):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 return [n for n in self._get_paths_for_status("deleted")]
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 def _get_submodule_url(self, submodule_path: bytes):
git_modules_path = b".gitmodules"
project: added all source files and assets
r1
if self._submodules is None:
self._submodules = {}
try:
submodules_node = self.get_node(git_modules_path)
except NodeDoesNotExistError:
return None
git: use safe configparser for git submodules to prevent from errors on submodules with % sign....
r4239 parser = configparser.RawConfigParser()
git: fix submodule handling for git repositories
r5126 parser.read_file(io.StringIO(submodules_node.str_content))
project: added all source files and assets
r1
for section in parser.sections():
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 path = parser.get(section, "path")
url = parser.get(section, "url")
project: added all source files and assets
r1 if path and url:
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 self._submodules[safe_bytes(path).strip(b"/")] = url
project: added all source files and assets
r1
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 return self._submodules.get(submodule_path.strip(b"/"))