##// END OF EJS Templates
docs: updated performance/tunning docs according to latest code changes
docs: updated performance/tunning docs according to latest code changes

File last commit:

r4115:4f985c11 default
r4172:0f4eef6e default
Show More
commit.py
484 lines | 15.4 KiB | text/x-python | PythonLexer
project: added all source files and assets
r1 # -*- coding: utf-8 -*-
docs: updated copyrights to 2019
r3363 # Copyright (C) 2014-2019 RhodeCode GmbH
project: added all source files and assets
r1 #
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/
"""
GIT commit module
"""
import re
import stat
from itertools import chain
from StringIO import StringIO
from zope.cachedescriptors.property import Lazy as LazyProperty
dan
vcs: make commit datetimes utc
r154 from rhodecode.lib.datelib import utcdate_fromtimestamp
project: added all source files and assets
r1 from rhodecode.lib.utils import safe_unicode, safe_str
from rhodecode.lib.utils2 import safe_int
from rhodecode.lib.vcs.conf import settings
from rhodecode.lib.vcs.backends import base
from rhodecode.lib.vcs.exceptions import CommitError, NodeDoesNotExistError
from rhodecode.lib.vcs.nodes import (
FileNode, DirNode, NodeKind, RootNode, SubModuleNode,
ChangedFileNodesGenerator, AddedFileNodesGenerator,
largefiles: enabled download of largefiles for git and mercurial from web interface....
r1577 RemovedFileNodesGenerator, LargeFileNode)
code: code fixes and small adjustments
r2623 from rhodecode.lib.vcs.compat import configparser
project: added all source files and assets
r1
class GitCommit(base.BaseCommit):
"""
Represents state of the repository at single commit id.
"""
_filter_pre_load = [
# done through a more complex tree walk on parents
"affected_files",
# done through subprocess not remote call
"children",
# done through a more complex tree walk on parents
"status",
# mercurial specific property not supported here
"_file_paths",
changelog: fix and optimize loading of chunks for file history....
r2130 # mercurial specific property not supported here
'obsolete',
# mercurial specific property not supported here
'phase',
# mercurial specific property not supported here
'hidden'
project: added all source files and assets
r1 ]
def __init__(self, repository, raw_id, idx, pre_load=None):
self.repository = repository
self._remote = repository._remote
# TODO: johbo: Tweak of raw_id should not be necessary
self.raw_id = safe_str(raw_id)
self.idx = idx
self._set_bulk_properties(pre_load)
# caches
self._stat_modes = {} # stat info for paths
self._paths = {} # path processed with parse_tree
self.nodes = {}
self._submodules = None
def _set_bulk_properties(self, pre_load):
vcs: optimized pre-load attributes for better caching.
r3850
project: added all source files and assets
r1 if not pre_load:
return
pre_load = [entry for entry in pre_load
if entry not in self._filter_pre_load]
if not pre_load:
return
result = self._remote.bulk_request(self.raw_id, pre_load)
for attr, value in result.items():
if attr in ["author", "message"]:
if value:
value = safe_unicode(value)
elif attr == "date":
dan
vcs: make commit datetimes utc
r154 value = utcdate_fromtimestamp(*value)
project: added all source files and assets
r1 elif attr == "parents":
value = self._make_commits(value)
vcs: optimized pre-load attributes for better caching.
r3850 elif attr == "branch":
value = value[0] if value else None
project: added all source files and assets
r1 self.__dict__[attr] = value
@LazyProperty
def _commit(self):
return self._remote[self.raw_id]
@LazyProperty
def _tree_id(self):
return self._remote[self._commit['tree']]['id']
@LazyProperty
def id(self):
return self.raw_id
@LazyProperty
def short_id(self):
return self.raw_id[:12]
@LazyProperty
def message(self):
git: adjusted code for new libgit2 backend...
r3842 return safe_unicode(self._remote.message(self.id))
project: added all source files and assets
r1
@LazyProperty
def committer(self):
git: adjusted code for new libgit2 backend...
r3842 return safe_unicode(self._remote.author(self.id))
project: added all source files and assets
r1
@LazyProperty
def author(self):
git: adjusted code for new libgit2 backend...
r3842 return safe_unicode(self._remote.author(self.id))
project: added all source files and assets
r1
@LazyProperty
def date(self):
git: adjusted code for new libgit2 backend...
r3842 unix_ts, tz = self._remote.date(self.raw_id)
dan
vcs: make commit datetimes utc
r154 return utcdate_fromtimestamp(unix_ts, tz)
project: added all source files and assets
r1
@LazyProperty
def status(self):
"""
Returns modified, added, removed, deleted files for current commit
"""
return self.changed, self.added, self.removed
@LazyProperty
def tags(self):
tags = [safe_unicode(name) for name,
commit_id in self.repository.tags.iteritems()
if commit_id == self.raw_id]
return tags
@LazyProperty
git: adjusted code for new libgit2 backend...
r3842 def commit_branches(self):
branches = []
project: added all source files and assets
r1 for name, commit_id in self.repository.branches.iteritems():
if commit_id == self.raw_id:
git: adjusted code for new libgit2 backend...
r3842 branches.append(name)
return branches
@LazyProperty
def branch(self):
core: added more accurate time measurement for called functions
r3853 branches = self._remote.branch(self.raw_id)
git: adjusted code for new libgit2 backend...
r3842 if branches:
vcs: optimized pre-load attributes for better caching.
r3850 # actually commit can have multiple branches in git
return safe_unicode(branches[0])
project: added all source files and assets
r1
git: adjusted code for new libgit2 backend...
r3842 def _get_tree_id_for_path(self, path):
project: added all source files and assets
r1 path = safe_str(path)
if path in self._paths:
return self._paths[path]
tree_id = self._tree_id
path = path.strip('/')
if path == '':
data = [tree_id, "tree"]
self._paths[''] = data
return data
git: adjusted code for new libgit2 backend...
r3842 tree_id, tree_type, tree_mode = \
self._remote.tree_and_type_for_path(self.raw_id, path)
if tree_id is None:
raise self.no_node_at_path(path)
project: added all source files and assets
r1
git: adjusted code for new libgit2 backend...
r3842 self._paths[path] = [tree_id, tree_type]
self._stat_modes[path] = tree_mode
project: added all source files and assets
r1
if path not in self._paths:
raise self.no_node_at_path(path)
return self._paths[path]
def _get_kind(self, path):
git: adjusted code for new libgit2 backend...
r3842 tree_id, type_ = self._get_tree_id_for_path(path)
project: added all source files and assets
r1 if type_ == 'blob':
return NodeKind.FILE
elif type_ == 'tree':
return NodeKind.DIR
git: adjusted code for new libgit2 backend...
r3842 elif type_ == 'link':
project: added all source files and assets
r1 return NodeKind.SUBMODULE
return None
def _get_filectx(self, path):
path = self._fix_path(path)
if self._get_kind(path) != NodeKind.FILE:
raise CommitError(
tests: fixing tests for pull-requests and changelog(commits)
r3772 "File does not exist for commit %s at '%s'" % (self.raw_id, path))
project: added all source files and assets
r1 return path
def _get_file_nodes(self):
return chain(*(t[2] for t in self.walk()))
@LazyProperty
def parents(self):
"""
Returns list of parent commits.
"""
git: adjusted code for new libgit2 backend...
r3842 parent_ids = self._remote.parents(self.id)
project: added all source files and assets
r1 return self._make_commits(parent_ids)
@LazyProperty
def children(self):
"""
Returns list of child commits.
"""
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 children = self._remote.children(self.raw_id)
return self._make_commits(children)
project: added all source files and assets
r1
git: adjusted code for new libgit2 backend...
r3842 def _make_commits(self, commit_ids):
def commit_maker(_commit_id):
return self.repository.get_commit(commit_id=commit_id)
return [commit_maker(commit_id) for commit_id in commit_ids]
project: added all source files and assets
r1
def get_file_mode(self, path):
"""
Returns stat mode of the file at the given `path`.
"""
path = safe_str(path)
# ensure path is traversed
git: adjusted code for new libgit2 backend...
r3842 self._get_tree_id_for_path(path)
project: added all source files and assets
r1 return self._stat_modes[path]
def is_link(self, path):
return stat.S_ISLNK(self.get_file_mode(path))
dan
vcsserver: made binary content check be calculated on vcsserver...
r3896 def is_node_binary(self, path):
tree_id, _ = self._get_tree_id_for_path(path)
return self._remote.is_binary(tree_id)
project: added all source files and assets
r1 def get_file_content(self, path):
"""
Returns content of the file at given `path`.
"""
git: adjusted code for new libgit2 backend...
r3842 tree_id, _ = self._get_tree_id_for_path(path)
return self._remote.blob_as_pretty_string(tree_id)
project: added all source files and assets
r1
dan
file-nodes: added streaming remote attributes for vcsserver....
r3895 def get_file_content_streamed(self, path):
tree_id, _ = self._get_tree_id_for_path(path)
stream_method = getattr(self._remote, 'stream:blob_as_pretty_string')
return stream_method(tree_id)
project: added all source files and assets
r1 def get_file_size(self, path):
"""
Returns size of the file at given `path`.
"""
git: adjusted code for new libgit2 backend...
r3842 tree_id, _ = self._get_tree_id_for_path(path)
return self._remote.blob_raw_length(tree_id)
project: added all source files and assets
r1
vcs: rename get_file_history to get_path_history as it better reflects what it does.
r3275 def get_path_history(self, path, limit=None, pre_load=None):
project: added all source files and assets
r1 """
Returns history of file as reversed list of `GitCommit` objects for
which file at given `path` has been modified.
"""
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 path = self._get_filectx(path)
hist = self._remote.node_history(self.raw_id, path, limit)
project: added all source files and assets
r1 return [
self.repository.get_commit(commit_id=commit_id, pre_load=pre_load)
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 for commit_id in hist]
project: added all source files and assets
r1
def get_file_annotate(self, path, pre_load=None):
"""
Returns a generator of four element tuples with
lineno, commit_id, commit lazy loader and line
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 """
project: added all source files and assets
r1
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 result = self._remote.node_annotate(self.raw_id, path)
project: added all source files and assets
r1
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 for ln_no, commit_id, content in result:
project: added all source files and assets
r1 yield (
git: replaced some raw subprocess commands with dedicated GIT vcsserver commands.
r3862 ln_no, commit_id,
lambda: self.repository.get_commit(commit_id=commit_id, pre_load=pre_load),
content)
project: added all source files and assets
r1
def get_nodes(self, path):
git: adjusted code for new libgit2 backend...
r3842
project: added all source files and assets
r1 if self._get_kind(path) != NodeKind.DIR:
raise CommitError(
tests: fixing tests for pull-requests and changelog(commits)
r3772 "Directory does not exist for commit %s at '%s'" % (self.raw_id, path))
project: added all source files and assets
r1 path = self._fix_path(path)
git: adjusted code for new libgit2 backend...
r3842
tree_id, _ = self._get_tree_id_for_path(path)
project: added all source files and assets
r1 dirnodes = []
filenodes = []
git: adjusted code for new libgit2 backend...
r3842
# extracted tree ID gives us our files...
project: added all source files and assets
r1 for name, stat_, id_, type_ in self._remote.tree_items(tree_id):
if type_ == 'link':
url = self._get_submodule_url('/'.join((path, name)))
dirnodes.append(SubModuleNode(
git: adjusted code for new libgit2 backend...
r3842 name, url=url, commit=id_, alias=self.repository.alias))
project: added all source files and assets
r1 continue
if path != '':
obj_path = '/'.join((path, name))
else:
obj_path = name
if obj_path not in self._stat_modes:
self._stat_modes[obj_path] = stat_
if type_ == 'tree':
dirnodes.append(DirNode(obj_path, commit=self))
elif type_ == 'blob':
filenodes.append(FileNode(obj_path, commit=self, mode=stat_))
else:
raise CommitError(
"Requested object should be Tree or Blob, is %s", type_)
nodes = dirnodes + filenodes
for node in nodes:
if node.path not in self.nodes:
self.nodes[node.path] = node
nodes.sort()
return nodes
vcs: added possibility to pre-load attributes for FileNodes.
r1355 def get_node(self, path, pre_load=None):
project: added all source files and assets
r1 if isinstance(path, unicode):
path = path.encode('utf-8')
path = self._fix_path(path)
if path not in self.nodes:
try:
git: adjusted code for new libgit2 backend...
r3842 tree_id, type_ = self._get_tree_id_for_path(path)
project: added all source files and assets
r1 except CommitError:
raise NodeDoesNotExistError(
"Cannot find one of parents' directories for a given "
"path: %s" % path)
git: fixed issue with git submodules detection.
r4115 if type_ in ['link', 'commit']:
project: added all source files and assets
r1 url = self._get_submodule_url(path)
git: adjusted code for new libgit2 backend...
r3842 node = SubModuleNode(path, url=url, commit=tree_id,
project: added all source files and assets
r1 alias=self.repository.alias)
elif type_ == 'tree':
if path == '':
node = RootNode(commit=self)
else:
node = DirNode(path, commit=self)
elif type_ == 'blob':
vcs: added possibility to pre-load attributes for FileNodes.
r1355 node = FileNode(path, commit=self, pre_load=pre_load)
git: adjusted code for new libgit2 backend...
r3842 self._stat_modes[path] = node.mode
project: added all source files and assets
r1 else:
raise self.no_node_at_path(path)
# cache node
self.nodes[path] = node
git: adjusted code for new libgit2 backend...
r3842
project: added all source files and assets
r1 return self.nodes[path]
largefiles: enabled download of largefiles for git and mercurial from web interface....
r1577 def get_largefile_node(self, path):
git: adjusted code for new libgit2 backend...
r3842 tree_id, _ = self._get_tree_id_for_path(path)
pointer_spec = self._remote.is_large_file(tree_id)
largefiles: enabled download of largefiles for git and mercurial from web interface....
r1577
if pointer_spec:
# content of that file regular FileNode is the hash of largefile
file_id = pointer_spec.get('oid_hash')
if self._remote.in_largefiles_store(file_id):
lf_path = self._remote.store_path(file_id)
return LargeFileNode(lf_path, commit=self, org_path=path)
project: added all source files and assets
r1 @LazyProperty
def affected_files(self):
"""
Gets a fast accessible file changes for given commit
"""
added, modified, deleted = self._changes_cache
return list(added.union(modified).union(deleted))
@LazyProperty
def _changes_cache(self):
added = set()
modified = set()
deleted = set()
_r = self._remote
parents = self.parents
if not self.parents:
parents = [base.EmptyCommit()]
for parent in parents:
if isinstance(parent, base.EmptyCommit):
oid = None
else:
oid = parent.raw_id
changes = _r.tree_changes(oid, self.raw_id)
for (oldpath, newpath), (_, _), (_, _) in changes:
if newpath and oldpath:
modified.add(newpath)
elif newpath and not oldpath:
added.add(newpath)
elif not newpath and oldpath:
deleted.add(oldpath)
return added, modified, deleted
def _get_paths_for_status(self, status):
"""
Returns sorted list of paths for given ``status``.
:param status: one of: *added*, *modified* or *deleted*
"""
added, modified, deleted = self._changes_cache
return sorted({
'added': list(added),
'modified': list(modified),
'deleted': list(deleted)}[status]
)
@LazyProperty
def added(self):
"""
Returns list of added ``FileNode`` objects.
"""
if not self.parents:
return list(self._get_file_nodes())
return AddedFileNodesGenerator(
[n for n in self._get_paths_for_status('added')], self)
@LazyProperty
def changed(self):
"""
Returns list of modified ``FileNode`` objects.
"""
if not self.parents:
return []
return ChangedFileNodesGenerator(
[n for n in self._get_paths_for_status('modified')], self)
@LazyProperty
def removed(self):
"""
Returns list of removed ``FileNode`` objects.
"""
if not self.parents:
return []
return RemovedFileNodesGenerator(
[n for n in self._get_paths_for_status('deleted')], self)
def _get_submodule_url(self, submodule_path):
git_modules_path = '.gitmodules'
if self._submodules is None:
self._submodules = {}
try:
submodules_node = self.get_node(git_modules_path)
except NodeDoesNotExistError:
return None
dan
git: fixed py3 compat for submodule read.
r3942 # ConfigParser fails if there are whitespaces, also it needs an iterable
# file like content
def iter_content(_content):
for line in _content.splitlines():
yield line
project: added all source files and assets
r1
compat: use py3 compatible configparser in all places.
r2355 parser = configparser.ConfigParser()
dan
git: fixed py3 compat for submodule read.
r3942 parser.read_file(iter_content(submodules_node.content))
project: added all source files and assets
r1
for section in parser.sections():
path = parser.get(section, 'path')
url = parser.get(section, 'url')
if path and url:
self._submodules[path.strip('/')] = url
return self._submodules.get(submodule_path.strip('/'))