changeset.py
554 lines
| 19.2 KiB
| text/x-python
|
PythonLexer
r2007 | import re | |||
from itertools import chain | ||||
from dulwich import objects | ||||
from subprocess import Popen, PIPE | ||||
r3797 | ||||
Bradley M. Kuhn
|
r4186 | from kallithea.lib.vcs.conf import settings | ||
from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset | ||||
from kallithea.lib.vcs.exceptions import ( | ||||
r3797 | RepositoryError, ChangesetError, NodeDoesNotExistError, VCSError, | |||
ChangesetDoesNotExistError, ImproperArchiveTypeError | ||||
) | ||||
Bradley M. Kuhn
|
r4186 | from kallithea.lib.vcs.nodes import ( | ||
r3797 | FileNode, DirNode, NodeKind, RootNode, RemovedFileNode, SubModuleNode, | |||
ChangedFileNodesGenerator, AddedFileNodesGenerator, RemovedFileNodesGenerator | ||||
) | ||||
Bradley M. Kuhn
|
r4186 | from kallithea.lib.vcs.utils import ( | ||
r3797 | safe_unicode, safe_str, safe_int, date_fromtimestamp | |||
) | ||||
Bradley M. Kuhn
|
r4186 | from kallithea.lib.vcs.utils.lazy import LazyProperty | ||
r2007 | ||||
class GitChangeset(BaseChangeset): | ||||
""" | ||||
Represents state of the repository at single revision. | ||||
""" | ||||
def __init__(self, repository, revision): | ||||
self._stat_modes = {} | ||||
self.repository = repository | ||||
r2537 | ||||
r2007 | try: | |||
r3805 | commit = self.repository._repo[revision] | |||
r2543 | if isinstance(commit, objects.Tag): | |||
r2537 | revision = commit.object[1] | |||
commit = self.repository._repo.get_object(commit.object[1]) | ||||
r2007 | except KeyError: | |||
r2537 | raise RepositoryError("Cannot get object with id %s" % revision) | |||
self.raw_id = revision | ||||
self.id = self.raw_id | ||||
self.short_id = self.raw_id[:12] | ||||
r2007 | self._commit = commit | |||
r2537 | self._tree_id = commit.tree | |||
Mads Kiilerich
|
r3492 | self._committer_property = 'committer' | ||
r3004 | self._author_property = 'author' | |||
r2537 | self._date_property = 'commit_time' | |||
self._date_tz_property = 'commit_timezone' | ||||
self.revision = repository.revisions.index(revision) | ||||
r2007 | ||||
self.nodes = {} | ||||
self._paths = {} | ||||
@LazyProperty | ||||
r3805 | def message(self): | |||
return safe_unicode(self._commit.message) | ||||
@LazyProperty | ||||
Mads Kiilerich
|
r3492 | def committer(self): | ||
return safe_unicode(getattr(self._commit, self._committer_property)) | ||||
r3004 | ||||
@LazyProperty | ||||
r2007 | def author(self): | |||
r3004 | return safe_unicode(getattr(self._commit, self._author_property)) | |||
r2007 | ||||
@LazyProperty | ||||
def date(self): | ||||
r2536 | return date_fromtimestamp(getattr(self._commit, self._date_property), | |||
getattr(self._commit, self._date_tz_property)) | ||||
r2007 | ||||
@LazyProperty | ||||
r2693 | def _timestamp(self): | |||
return getattr(self._commit, self._date_property) | ||||
@LazyProperty | ||||
r2007 | def status(self): | |||
""" | ||||
Returns modified, added, removed, deleted files for current changeset | ||||
""" | ||||
return self.changed, self.added, self.removed | ||||
@LazyProperty | ||||
r2975 | def tags(self): | |||
_tags = [] | ||||
for tname, tsha in self.repository.tags.iteritems(): | ||||
if tsha == self.raw_id: | ||||
_tags.append(tname) | ||||
return _tags | ||||
@LazyProperty | ||||
r2007 | def branch(self): | |||
r2198 | ||||
heads = self.repository._heads(reverse=False) | ||||
r2007 | ||||
r2198 | ref = heads.get(self.raw_id) | |||
if ref: | ||||
return safe_unicode(ref) | ||||
r2007 | def _fix_path(self, path): | |||
""" | ||||
Paths are stored without trailing slash so we need to get rid off it if | ||||
needed. | ||||
""" | ||||
if path.endswith('/'): | ||||
path = path.rstrip('/') | ||||
return path | ||||
def _get_id_for_path(self, path): | ||||
r3912 | path = safe_str(path) | |||
r2007 | # FIXME: Please, spare a couple of minutes and make those codes cleaner; | |||
if not path in self._paths: | ||||
path = path.strip('/') | ||||
# set root tree | ||||
r2536 | tree = self.repository._repo[self._tree_id] | |||
r2007 | if path == '': | |||
self._paths[''] = tree.id | ||||
return tree.id | ||||
splitted = path.split('/') | ||||
dirs, name = splitted[:-1], splitted[-1] | ||||
curdir = '' | ||||
r2116 | ||||
# initially extract things from root dir | ||||
for item, stat, id in tree.iteritems(): | ||||
if curdir: | ||||
name = '/'.join((curdir, item)) | ||||
else: | ||||
name = item | ||||
self._paths[name] = id | ||||
self._stat_modes[name] = stat | ||||
r2007 | for dir in dirs: | |||
if curdir: | ||||
curdir = '/'.join((curdir, dir)) | ||||
else: | ||||
curdir = dir | ||||
dir_id = None | ||||
for item, stat, id in tree.iteritems(): | ||||
if dir == item: | ||||
dir_id = id | ||||
if dir_id: | ||||
# Update tree | ||||
tree = self.repository._repo[dir_id] | ||||
if not isinstance(tree, objects.Tree): | ||||
raise ChangesetError('%s is not a directory' % curdir) | ||||
else: | ||||
raise ChangesetError('%s have not been found' % curdir) | ||||
r2116 | ||||
# cache all items from the given traversed tree | ||||
for item, stat, id in tree.iteritems(): | ||||
if curdir: | ||||
name = '/'.join((curdir, item)) | ||||
else: | ||||
name = item | ||||
self._paths[name] = id | ||||
self._stat_modes[name] = stat | ||||
r2007 | if not path in self._paths: | |||
raise NodeDoesNotExistError("There is no file nor directory " | ||||
Mads Kiilerich
|
r3575 | "at the given path '%s' at revision %s" | ||
r3912 | % (path, safe_str(self.short_id))) | |||
r2007 | return self._paths[path] | |||
def _get_kind(self, path): | ||||
r2536 | obj = self.repository._repo[self._get_id_for_path(path)] | |||
r2007 | if isinstance(obj, objects.Blob): | |||
return NodeKind.FILE | ||||
elif isinstance(obj, objects.Tree): | ||||
return NodeKind.DIR | ||||
r3039 | def _get_filectx(self, path): | |||
path = self._fix_path(path) | ||||
if self._get_kind(path) != NodeKind.FILE: | ||||
Mads Kiilerich
|
r3575 | raise ChangesetError("File does not exist for revision %s at " | ||
" '%s'" % (self.raw_id, path)) | ||||
r3039 | return path | |||
r2007 | def _get_file_nodes(self): | |||
return chain(*(t[2] for t in self.walk())) | ||||
@LazyProperty | ||||
def parents(self): | ||||
""" | ||||
Returns list of parents changesets. | ||||
""" | ||||
return [self.repository.get_changeset(parent) | ||||
r2536 | for parent in self._commit.parents] | |||
r2007 | ||||
r3077 | @LazyProperty | |||
def children(self): | ||||
""" | ||||
Returns list of children changesets. | ||||
""" | ||||
r3797 | rev_filter = _git_path = settings.GIT_REV_FILTER | |||
r3077 | so, se = self.repository.run_git_command( | |||
Bradley M. Kuhn
|
r4116 | "rev-list %s --children" % (rev_filter) | ||
r3077 | ) | |||
children = [] | ||||
Bradley M. Kuhn
|
r4116 | pat = re.compile(r'^%s' % self.raw_id) | ||
r3077 | for l in so.splitlines(): | |||
Bradley M. Kuhn
|
r4116 | if pat.match(l): | ||
childs = l.split(' ')[1:] | ||||
children.extend(childs) | ||||
r3077 | return [self.repository.get_changeset(cs) for cs in children] | |||
r2007 | def next(self, branch=None): | |||
if branch and self.branch != branch: | ||||
raise VCSError('Branch option used on changeset not belonging ' | ||||
'to that branch') | ||||
def _next(changeset, branch): | ||||
try: | ||||
next_ = changeset.revision + 1 | ||||
next_rev = changeset.repository.revisions[next_] | ||||
except IndexError: | ||||
raise ChangesetDoesNotExistError | ||||
cs = changeset.repository.get_changeset(next_rev) | ||||
if branch and branch != cs.branch: | ||||
return _next(cs, branch) | ||||
return cs | ||||
return _next(self, branch) | ||||
def prev(self, branch=None): | ||||
if branch and self.branch != branch: | ||||
raise VCSError('Branch option used on changeset not belonging ' | ||||
'to that branch') | ||||
def _prev(changeset, branch): | ||||
try: | ||||
prev_ = changeset.revision - 1 | ||||
if prev_ < 0: | ||||
raise IndexError | ||||
prev_rev = changeset.repository.revisions[prev_] | ||||
except IndexError: | ||||
raise ChangesetDoesNotExistError | ||||
cs = changeset.repository.get_changeset(prev_rev) | ||||
if branch and branch != cs.branch: | ||||
return _prev(cs, branch) | ||||
return cs | ||||
return _prev(self, branch) | ||||
r2384 | def diff(self, ignore_whitespace=True, context=3): | |||
r2499 | rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET | |||
rev2 = self | ||||
return ''.join(self.repository.get_diff(rev1, rev2, | ||||
r2384 | ignore_whitespace=ignore_whitespace, | |||
context=context)) | ||||
r2007 | def get_file_mode(self, path): | |||
""" | ||||
Returns stat mode of the file at the given ``path``. | ||||
""" | ||||
# ensure path is traversed | ||||
r3912 | path = safe_str(path) | |||
r2007 | self._get_id_for_path(path) | |||
return self._stat_modes[path] | ||||
def get_file_content(self, path): | ||||
""" | ||||
Returns content of the file at given ``path``. | ||||
""" | ||||
id = self._get_id_for_path(path) | ||||
blob = self.repository._repo[id] | ||||
return blob.as_pretty_string() | ||||
def get_file_size(self, path): | ||||
""" | ||||
Returns size of the file at given ``path``. | ||||
""" | ||||
id = self._get_id_for_path(path) | ||||
blob = self.repository._repo[id] | ||||
return blob.raw_length() | ||||
def get_file_changeset(self, path): | ||||
""" | ||||
Returns last commit of the file at the given ``path``. | ||||
""" | ||||
r3496 | return self.get_file_history(path, limit=1)[0] | |||
r2007 | ||||
r3496 | def get_file_history(self, path, limit=None): | |||
r2007 | """ | |||
Returns history of file as reversed list of ``Changeset`` objects for | ||||
which file at given ``path`` has been modified. | ||||
TODO: This function now uses os underlying 'git' and 'grep' commands | ||||
which is generally not good. Should be replaced with algorithm | ||||
iterating commits. | ||||
""" | ||||
r3792 | self._get_filectx(path) | |||
cs_id = safe_str(self.id) | ||||
f_path = safe_str(path) | ||||
r3496 | ||||
if limit: | ||||
Bradley M. Kuhn
|
r4116 | cmd = 'log -n %s --pretty="format: %%H" -s %s -- "%s"' % ( | ||
safe_int(limit, 0), cs_id, f_path) | ||||
r3792 | ||||
r3496 | else: | |||
Bradley M. Kuhn
|
r4116 | cmd = 'log --pretty="format: %%H" -s %s -- "%s"' % ( | ||
cs_id, f_path) | ||||
r2007 | so, se = self.repository.run_git_command(cmd) | |||
r2276 | ids = re.findall(r'[0-9a-fA-F]{40}', so) | |||
Bradley M. Kuhn
|
r4116 | return [self.repository.get_changeset(sha) for sha in ids] | ||
r2007 | ||||
r3044 | def get_file_history_2(self, path): | |||
""" | ||||
Returns history of file as reversed list of ``Changeset`` objects for | ||||
which file at given ``path`` has been modified. | ||||
""" | ||||
self._get_filectx(path) | ||||
from dulwich.walk import Walker | ||||
include = [self.id] | ||||
walker = Walker(self.repository._repo.object_store, include, | ||||
paths=[path], max_entries=1) | ||||
r3057 | return [self.repository.get_changeset(sha) | |||
r3044 | for sha in (x.commit.id for x in walker)] | |||
r2007 | def get_file_annotate(self, path): | |||
""" | ||||
r3044 | Returns a generator of four element tuples with | |||
lineno, sha, changeset lazy loader and line | ||||
r2007 | ||||
TODO: This function now uses os underlying 'git' command which is | ||||
generally not good. Should be replaced with algorithm iterating | ||||
commits. | ||||
""" | ||||
cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path) | ||||
# -l ==> outputs long shas (and we need all 40 characters) | ||||
# --root ==> doesn't put '^' character for bounderies | ||||
# -r sha ==> blames for the given revision | ||||
so, se = self.repository.run_git_command(cmd) | ||||
r2448 | ||||
r2007 | for i, blame_line in enumerate(so.split('\n')[:-1]): | |||
ln_no = i + 1 | ||||
r3044 | sha, line = re.split(r' ', blame_line, 1) | |||
yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line) | ||||
r2007 | ||||
def fill_archive(self, stream=None, kind='tgz', prefix=None, | ||||
subrepos=False): | ||||
""" | ||||
Fills up given stream. | ||||
:param stream: file like object. | ||||
:param kind: one of following: ``zip``, ``tgz`` or ``tbz2``. | ||||
Default: ``tgz``. | ||||
:param prefix: name of root directory in archive. | ||||
Default is repository name and changeset's raw_id joined with dash | ||||
(``repo-tip.<KIND>``). | ||||
:param subrepos: include subrepos in this archive. | ||||
:raise ImproperArchiveTypeError: If given kind is wrong. | ||||
:raise VcsError: If given stream is None | ||||
""" | ||||
allowed_kinds = settings.ARCHIVE_SPECS.keys() | ||||
if kind not in allowed_kinds: | ||||
raise ImproperArchiveTypeError('Archive kind not supported use one' | ||||
'of %s', allowed_kinds) | ||||
if prefix is None: | ||||
prefix = '%s-%s' % (self.repository.name, self.short_id) | ||||
elif prefix.startswith('/'): | ||||
raise VCSError("Prefix cannot start with leading slash") | ||||
elif prefix.strip() == '': | ||||
raise VCSError("Prefix cannot be empty") | ||||
if kind == 'zip': | ||||
frmt = 'zip' | ||||
else: | ||||
frmt = 'tar' | ||||
r3797 | _git_path = settings.GIT_EXECUTABLE_PATH | |||
r3394 | cmd = '%s archive --format=%s --prefix=%s/ %s' % (_git_path, | |||
r3376 | frmt, prefix, self.raw_id) | |||
r2007 | if kind == 'tgz': | |||
cmd += ' | gzip -9' | ||||
elif kind == 'tbz2': | ||||
cmd += ' | bzip2 -9' | ||||
if stream is None: | ||||
raise VCSError('You need to pass in a valid stream for filling' | ||||
' with archival data') | ||||
popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, | ||||
Bradley M. Kuhn
|
r4116 | cwd=self.repository.path) | ||
r2007 | ||||
buffer_size = 1024 * 8 | ||||
chunk = popen.stdout.read(buffer_size) | ||||
while chunk: | ||||
stream.write(chunk) | ||||
chunk = popen.stdout.read(buffer_size) | ||||
# Make sure all descriptors would be read | ||||
popen.communicate() | ||||
def get_nodes(self, path): | ||||
if self._get_kind(path) != NodeKind.DIR: | ||||
Mads Kiilerich
|
r3575 | raise ChangesetError("Directory does not exist for revision %s at " | ||
" '%s'" % (self.revision, path)) | ||||
r2007 | path = self._fix_path(path) | |||
id = self._get_id_for_path(path) | ||||
tree = self.repository._repo[id] | ||||
dirnodes = [] | ||||
filenodes = [] | ||||
r2232 | als = self.repository.alias | |||
r2007 | for name, stat, id in tree.iteritems(): | |||
r2232 | if objects.S_ISGITLINK(stat): | |||
dirnodes.append(SubModuleNode(name, url=None, changeset=id, | ||||
alias=als)) | ||||
continue | ||||
r2007 | obj = self.repository._repo.get_object(id) | |||
if path != '': | ||||
obj_path = '/'.join((path, name)) | ||||
else: | ||||
obj_path = name | ||||
if obj_path not in self._stat_modes: | ||||
self._stat_modes[obj_path] = stat | ||||
if isinstance(obj, objects.Tree): | ||||
dirnodes.append(DirNode(obj_path, changeset=self)) | ||||
elif isinstance(obj, objects.Blob): | ||||
filenodes.append(FileNode(obj_path, changeset=self, mode=stat)) | ||||
else: | ||||
raise ChangesetError("Requested object should be Tree " | ||||
"or Blob, is %r" % type(obj)) | ||||
nodes = dirnodes + filenodes | ||||
for node in nodes: | ||||
if not node.path in self.nodes: | ||||
self.nodes[node.path] = node | ||||
nodes.sort() | ||||
return nodes | ||||
def get_node(self, path): | ||||
if isinstance(path, unicode): | ||||
path = path.encode('utf-8') | ||||
path = self._fix_path(path) | ||||
if not path in self.nodes: | ||||
try: | ||||
r2233 | id_ = self._get_id_for_path(path) | |||
r2007 | except ChangesetError: | |||
raise NodeDoesNotExistError("Cannot find one of parents' " | ||||
"directories for a given path: %s" % path) | ||||
r2233 | ||||
_GL = lambda m: m and objects.S_ISGITLINK(m) | ||||
if _GL(self._stat_modes.get(path)): | ||||
r2536 | node = SubModuleNode(path, url=None, changeset=id_, | |||
alias=self.repository.alias) | ||||
r2233 | else: | |||
obj = self.repository._repo.get_object(id_) | ||||
r2537 | if isinstance(obj, objects.Tree): | |||
r2233 | if path == '': | |||
node = RootNode(changeset=self) | ||||
else: | ||||
node = DirNode(path, changeset=self) | ||||
node._tree = obj | ||||
elif isinstance(obj, objects.Blob): | ||||
node = FileNode(path, changeset=self) | ||||
node._blob = obj | ||||
r2007 | else: | |||
r2233 | raise NodeDoesNotExistError("There is no file nor directory " | |||
Mads Kiilerich
|
r3575 | "at the given path '%s' at revision %s" | ||
r2233 | % (path, self.short_id)) | |||
r2007 | # cache node | |||
self.nodes[path] = node | ||||
return self.nodes[path] | ||||
@LazyProperty | ||||
def affected_files(self): | ||||
""" | ||||
r4089 | Gets a fast accessible file changes for given changeset | |||
r2007 | """ | |||
r3797 | added, modified, deleted = self._changes_cache | |||
return list(added.union(modified).union(deleted)) | ||||
r2007 | ||||
@LazyProperty | ||||
def _diff_name_status(self): | ||||
output = [] | ||||
for parent in self.parents: | ||||
r2761 | cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, | |||
self.raw_id) | ||||
r2007 | so, se = self.repository.run_git_command(cmd) | |||
output.append(so.strip()) | ||||
return '\n'.join(output) | ||||
r2762 | @LazyProperty | |||
def _changes_cache(self): | ||||
added = set() | ||||
modified = set() | ||||
deleted = set() | ||||
_r = self.repository._repo | ||||
parents = self.parents | ||||
if not self.parents: | ||||
parents = [EmptyChangeset()] | ||||
for parent in parents: | ||||
if isinstance(parent, EmptyChangeset): | ||||
oid = None | ||||
else: | ||||
oid = _r[parent.raw_id].tree | ||||
changes = _r.object_store.tree_changes(oid, _r[self.raw_id].tree) | ||||
for (oldpath, newpath), (_, _), (_, _) in changes: | ||||
if newpath and oldpath: | ||||
modified.add(newpath) | ||||
elif newpath and not oldpath: | ||||
added.add(newpath) | ||||
elif not newpath and oldpath: | ||||
deleted.add(oldpath) | ||||
return added, modified, deleted | ||||
r2007 | def _get_paths_for_status(self, status): | |||
""" | ||||
Returns sorted list of paths for given ``status``. | ||||
:param status: one of: *added*, *modified* or *deleted* | ||||
""" | ||||
r3797 | added, modified, deleted = self._changes_cache | |||
r2762 | return sorted({ | |||
r3797 | 'added': list(added), | |||
'modified': list(modified), | ||||
'deleted': list(deleted)}[status] | ||||
r2762 | ) | |||
r2007 | ||||
@LazyProperty | ||||
def added(self): | ||||
""" | ||||
Returns list of added ``FileNode`` objects. | ||||
""" | ||||
if not self.parents: | ||||
return list(self._get_file_nodes()) | ||||
r2968 | return AddedFileNodesGenerator([n for n in | |||
self._get_paths_for_status('added')], self) | ||||
r2007 | ||||
@LazyProperty | ||||
def changed(self): | ||||
""" | ||||
Returns list of modified ``FileNode`` objects. | ||||
""" | ||||
if not self.parents: | ||||
return [] | ||||
r2968 | return ChangedFileNodesGenerator([n for n in | |||
self._get_paths_for_status('modified')], self) | ||||
r2007 | ||||
@LazyProperty | ||||
def removed(self): | ||||
""" | ||||
Returns list of removed ``FileNode`` objects. | ||||
""" | ||||
if not self.parents: | ||||
return [] | ||||
r2968 | return RemovedFileNodesGenerator([n for n in | |||
self._get_paths_for_status('deleted')], self) | ||||