# -*- coding: utf-8 -*- # Copyright (C) 2014-2018 RhodeCode GmbH # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License, version 3 # (only), as published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This program is dual-licensed. If you wish to learn more about the # RhodeCode Enterprise Edition, including its added features, Support services, # and proprietary license terms, please see https://rhodecode.com/licenses/ """ GIT commit module """ import re import stat from itertools import chain from StringIO import StringIO from zope.cachedescriptors.property import Lazy as LazyProperty from rhodecode.lib.datelib import utcdate_fromtimestamp from rhodecode.lib.utils import safe_unicode, safe_str from rhodecode.lib.utils2 import safe_int from rhodecode.lib.vcs.conf import settings from rhodecode.lib.vcs.backends import base from rhodecode.lib.vcs.exceptions import CommitError, NodeDoesNotExistError from rhodecode.lib.vcs.nodes import ( FileNode, DirNode, NodeKind, RootNode, SubModuleNode, ChangedFileNodesGenerator, AddedFileNodesGenerator, RemovedFileNodesGenerator, LargeFileNode) from rhodecode.lib.vcs.compat import configparser class GitCommit(base.BaseCommit): """ Represents state of the repository at single commit id. """ _author_property = 'author' _committer_property = 'committer' _date_property = 'commit_time' _date_tz_property = 'commit_timezone' _message_property = 'message' _parents_property = 'parents' _filter_pre_load = [ # done through a more complex tree walk on parents "affected_files", # based on repository cached property "branch", # done through subprocess not remote call "children", # done through a more complex tree walk on parents "status", # mercurial specific property not supported here "_file_paths", # mercurial specific property not supported here 'obsolete', # mercurial specific property not supported here 'phase', # mercurial specific property not supported here 'hidden' ] def __init__(self, repository, raw_id, idx, pre_load=None): self.repository = repository self._remote = repository._remote # TODO: johbo: Tweak of raw_id should not be necessary self.raw_id = safe_str(raw_id) self.idx = idx self._set_bulk_properties(pre_load) # caches self._stat_modes = {} # stat info for paths self._paths = {} # path processed with parse_tree self.nodes = {} self._submodules = None def _set_bulk_properties(self, pre_load): if not pre_load: return pre_load = [entry for entry in pre_load if entry not in self._filter_pre_load] if not pre_load: return result = self._remote.bulk_request(self.raw_id, pre_load) for attr, value in result.items(): if attr in ["author", "message"]: if value: value = safe_unicode(value) elif attr == "date": value = utcdate_fromtimestamp(*value) elif attr == "parents": value = self._make_commits(value) self.__dict__[attr] = value @LazyProperty def _commit(self): return self._remote[self.raw_id] @LazyProperty def _tree_id(self): return self._remote[self._commit['tree']]['id'] @LazyProperty def id(self): return self.raw_id @LazyProperty def short_id(self): return self.raw_id[:12] @LazyProperty def message(self): return safe_unicode( self._remote.commit_attribute(self.id, self._message_property)) @LazyProperty def committer(self): return safe_unicode( self._remote.commit_attribute(self.id, self._committer_property)) @LazyProperty def author(self): return safe_unicode( self._remote.commit_attribute(self.id, self._author_property)) @LazyProperty def date(self): unix_ts, tz = self._remote.get_object_attrs( self.raw_id, self._date_property, self._date_tz_property) return utcdate_fromtimestamp(unix_ts, tz) @LazyProperty def status(self): """ Returns modified, added, removed, deleted files for current commit """ return self.changed, self.added, self.removed @LazyProperty def tags(self): tags = [safe_unicode(name) for name, commit_id in self.repository.tags.iteritems() if commit_id == self.raw_id] return tags @LazyProperty def branch(self): for name, commit_id in self.repository.branches.iteritems(): if commit_id == self.raw_id: return safe_unicode(name) return None def _get_id_for_path(self, path): path = safe_str(path) if path in self._paths: return self._paths[path] tree_id = self._tree_id path = path.strip('/') if path == '': data = [tree_id, "tree"] self._paths[''] = data return data parts = path.split('/') dirs, name = parts[:-1], parts[-1] cur_dir = '' # initially extract things from root dir tree_items = self._remote.tree_items(tree_id) self._process_tree_items(tree_items, cur_dir) for dir in dirs: if cur_dir: cur_dir = '/'.join((cur_dir, dir)) else: cur_dir = dir dir_id = None for item, stat_, id_, type_ in tree_items: if item == dir: dir_id = id_ break if dir_id: if type_ != "tree": raise CommitError('%s is not a directory' % cur_dir) # update tree tree_items = self._remote.tree_items(dir_id) else: raise CommitError('%s have not been found' % cur_dir) # cache all items from the given traversed tree self._process_tree_items(tree_items, cur_dir) if path not in self._paths: raise self.no_node_at_path(path) return self._paths[path] def _process_tree_items(self, items, cur_dir): for item, stat_, id_, type_ in items: if cur_dir: name = '/'.join((cur_dir, item)) else: name = item self._paths[name] = [id_, type_] self._stat_modes[name] = stat_ def _get_kind(self, path): path_id, type_ = self._get_id_for_path(path) if type_ == 'blob': return NodeKind.FILE elif type_ == 'tree': return NodeKind.DIR elif type == 'link': return NodeKind.SUBMODULE return None def _get_filectx(self, path): path = self._fix_path(path) if self._get_kind(path) != NodeKind.FILE: raise CommitError( "File does not exist for commit %s at '%s'" % (self.raw_id, path)) return path def _get_file_nodes(self): return chain(*(t[2] for t in self.walk())) @LazyProperty def parents(self): """ Returns list of parent commits. """ parent_ids = self._remote.commit_attribute( self.id, self._parents_property) return self._make_commits(parent_ids) @LazyProperty def children(self): """ Returns list of child commits. """ rev_filter = settings.GIT_REV_FILTER output, __ = self.repository.run_git_command( ['rev-list', '--children'] + rev_filter) child_ids = [] pat = re.compile(r'^%s' % self.raw_id) for l in output.splitlines(): if pat.match(l): found_ids = l.split(' ')[1:] child_ids.extend(found_ids) return self._make_commits(child_ids) def _make_commits(self, commit_ids, pre_load=None): return [ self.repository.get_commit(commit_id=commit_id, pre_load=pre_load) for commit_id in commit_ids] def get_file_mode(self, path): """ Returns stat mode of the file at the given `path`. """ path = safe_str(path) # ensure path is traversed self._get_id_for_path(path) return self._stat_modes[path] def is_link(self, path): return stat.S_ISLNK(self.get_file_mode(path)) def get_file_content(self, path): """ Returns content of the file at given `path`. """ id_, _ = self._get_id_for_path(path) return self._remote.blob_as_pretty_string(id_) def get_file_size(self, path): """ Returns size of the file at given `path`. """ id_, _ = self._get_id_for_path(path) return self._remote.blob_raw_length(id_) def get_path_history(self, path, limit=None, pre_load=None): """ Returns history of file as reversed list of `GitCommit` objects for which file at given `path` has been modified. TODO: This function now uses an underlying 'git' command which works quickly but ideally we should replace with an algorithm. """ self._get_filectx(path) f_path = safe_str(path) cmd = ['log'] if limit: cmd.extend(['-n', str(safe_int(limit, 0))]) cmd.extend(['--pretty=format: %H', '-s', self.raw_id, '--', f_path]) output, __ = self.repository.run_git_command(cmd) commit_ids = re.findall(r'[0-9a-fA-F]{40}', output) return [ self.repository.get_commit(commit_id=commit_id, pre_load=pre_load) for commit_id in commit_ids] def get_file_annotate(self, path, pre_load=None): """ Returns a generator of four element tuples with lineno, commit_id, commit lazy loader and line TODO: This function now uses os underlying 'git' command which is generally not good. Should be replaced with algorithm iterating commits. """ cmd = ['blame', '-l', '--root', '-r', self.raw_id, '--', path] # -l ==> outputs long shas (and we need all 40 characters) # --root ==> doesn't put '^' character for bounderies # -r commit_id ==> blames for the given commit output, __ = self.repository.run_git_command(cmd) for i, blame_line in enumerate(output.split('\n')[:-1]): line_no = i + 1 commit_id, line = re.split(r' ', blame_line, 1) yield ( line_no, commit_id, lambda: self.repository.get_commit(commit_id=commit_id, pre_load=pre_load), line) def get_nodes(self, path): if self._get_kind(path) != NodeKind.DIR: raise CommitError( "Directory does not exist for commit %s at " " '%s'" % (self.raw_id, path)) path = self._fix_path(path) id_, _ = self._get_id_for_path(path) tree_id = self._remote[id_]['id'] dirnodes = [] filenodes = [] alias = self.repository.alias for name, stat_, id_, type_ in self._remote.tree_items(tree_id): if type_ == 'link': url = self._get_submodule_url('/'.join((path, name))) dirnodes.append(SubModuleNode( name, url=url, commit=id_, alias=alias)) continue if path != '': obj_path = '/'.join((path, name)) else: obj_path = name if obj_path not in self._stat_modes: self._stat_modes[obj_path] = stat_ if type_ == 'tree': dirnodes.append(DirNode(obj_path, commit=self)) elif type_ == 'blob': filenodes.append(FileNode(obj_path, commit=self, mode=stat_)) else: raise CommitError( "Requested object should be Tree or Blob, is %s", type_) nodes = dirnodes + filenodes for node in nodes: if node.path not in self.nodes: self.nodes[node.path] = node nodes.sort() return nodes def get_node(self, path, pre_load=None): if isinstance(path, unicode): path = path.encode('utf-8') path = self._fix_path(path) if path not in self.nodes: try: id_, type_ = self._get_id_for_path(path) except CommitError: raise NodeDoesNotExistError( "Cannot find one of parents' directories for a given " "path: %s" % path) if type_ == 'link': url = self._get_submodule_url(path) node = SubModuleNode(path, url=url, commit=id_, alias=self.repository.alias) elif type_ == 'tree': if path == '': node = RootNode(commit=self) else: node = DirNode(path, commit=self) elif type_ == 'blob': node = FileNode(path, commit=self, pre_load=pre_load) else: raise self.no_node_at_path(path) # cache node self.nodes[path] = node return self.nodes[path] def get_largefile_node(self, path): id_, _ = self._get_id_for_path(path) pointer_spec = self._remote.is_large_file(id_) if pointer_spec: # content of that file regular FileNode is the hash of largefile file_id = pointer_spec.get('oid_hash') if self._remote.in_largefiles_store(file_id): lf_path = self._remote.store_path(file_id) return LargeFileNode(lf_path, commit=self, org_path=path) @LazyProperty def affected_files(self): """ Gets a fast accessible file changes for given commit """ added, modified, deleted = self._changes_cache return list(added.union(modified).union(deleted)) @LazyProperty def _changes_cache(self): added = set() modified = set() deleted = set() _r = self._remote parents = self.parents if not self.parents: parents = [base.EmptyCommit()] for parent in parents: if isinstance(parent, base.EmptyCommit): oid = None else: oid = parent.raw_id changes = _r.tree_changes(oid, self.raw_id) for (oldpath, newpath), (_, _), (_, _) in changes: if newpath and oldpath: modified.add(newpath) elif newpath and not oldpath: added.add(newpath) elif not newpath and oldpath: deleted.add(oldpath) return added, modified, deleted def _get_paths_for_status(self, status): """ Returns sorted list of paths for given ``status``. :param status: one of: *added*, *modified* or *deleted* """ added, modified, deleted = self._changes_cache return sorted({ 'added': list(added), 'modified': list(modified), 'deleted': list(deleted)}[status] ) @LazyProperty def added(self): """ Returns list of added ``FileNode`` objects. """ if not self.parents: return list(self._get_file_nodes()) return AddedFileNodesGenerator( [n for n in self._get_paths_for_status('added')], self) @LazyProperty def changed(self): """ Returns list of modified ``FileNode`` objects. """ if not self.parents: return [] return ChangedFileNodesGenerator( [n for n in self._get_paths_for_status('modified')], self) @LazyProperty def removed(self): """ Returns list of removed ``FileNode`` objects. """ if not self.parents: return [] return RemovedFileNodesGenerator( [n for n in self._get_paths_for_status('deleted')], self) def _get_submodule_url(self, submodule_path): git_modules_path = '.gitmodules' if self._submodules is None: self._submodules = {} try: submodules_node = self.get_node(git_modules_path) except NodeDoesNotExistError: return None content = submodules_node.content # ConfigParser fails if there are whitespaces content = '\n'.join(l.strip() for l in content.split('\n')) parser = configparser.ConfigParser() parser.readfp(StringIO(content)) for section in parser.sections(): path = parser.get(section, 'path') url = parser.get(section, 'url') if path and url: self._submodules[path.strip('/')] = url return self._submodules.get(submodule_path.strip('/'))