# -*- coding: utf-8 -*-

# Copyright (C) 2014-2018 RhodeCode GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/

"""
GIT commit module
"""

import re
import stat
from itertools import chain
from StringIO import StringIO

from zope.cachedescriptors.property import Lazy as LazyProperty

from rhodecode.lib.datelib import utcdate_fromtimestamp
from rhodecode.lib.utils import safe_unicode, safe_str
from rhodecode.lib.utils2 import safe_int
from rhodecode.lib.vcs.conf import settings
from rhodecode.lib.vcs.backends import base
from rhodecode.lib.vcs.exceptions import CommitError, NodeDoesNotExistError
from rhodecode.lib.vcs.nodes import (
    FileNode, DirNode, NodeKind, RootNode, SubModuleNode,
    ChangedFileNodesGenerator, AddedFileNodesGenerator,
    RemovedFileNodesGenerator, LargeFileNode)
from rhodecode.lib.vcs.compat import configparser


class GitCommit(base.BaseCommit):
    """
    Represents state of the repository at single commit id.
    """
    _author_property = 'author'
    _committer_property = 'committer'
    _date_property = 'commit_time'
    _date_tz_property = 'commit_timezone'
    _message_property = 'message'
    _parents_property = 'parents'

    _filter_pre_load = [
        # done through a more complex tree walk on parents
        "affected_files",
        # based on repository cached property
        "branch",
        # done through subprocess not remote call
        "children",
        # done through a more complex tree walk on parents
        "status",
        # mercurial specific property not supported here
        "_file_paths",
        # mercurial specific property not supported here
        'obsolete',
        # mercurial specific property not supported here
        'phase',
        # mercurial specific property not supported here
        'hidden'
    ]

    def __init__(self, repository, raw_id, idx, pre_load=None):
        self.repository = repository
        self._remote = repository._remote
        # TODO: johbo: Tweak of raw_id should not be necessary
        self.raw_id = safe_str(raw_id)
        self.idx = idx

        self._set_bulk_properties(pre_load)

        # caches
        self._stat_modes = {}  # stat info for paths
        self._paths = {}  # path processed with parse_tree
        self.nodes = {}
        self._submodules = None

    def _set_bulk_properties(self, pre_load):
        if not pre_load:
            return
        pre_load = [entry for entry in pre_load
                    if entry not in self._filter_pre_load]
        if not pre_load:
            return

        result = self._remote.bulk_request(self.raw_id, pre_load)
        for attr, value in result.items():
            if attr in ["author", "message"]:
                if value:
                    value = safe_unicode(value)
            elif attr == "date":
                value = utcdate_fromtimestamp(*value)
            elif attr == "parents":
                value = self._make_commits(value)
            self.__dict__[attr] = value

    @LazyProperty
    def _commit(self):
        return self._remote[self.raw_id]

    @LazyProperty
    def _tree_id(self):
        return self._remote[self._commit['tree']]['id']

    @LazyProperty
    def id(self):
        return self.raw_id

    @LazyProperty
    def short_id(self):
        return self.raw_id[:12]

    @LazyProperty
    def message(self):
        return safe_unicode(
            self._remote.commit_attribute(self.id, self._message_property))

    @LazyProperty
    def committer(self):
        return safe_unicode(
            self._remote.commit_attribute(self.id, self._committer_property))

    @LazyProperty
    def author(self):
        return safe_unicode(
            self._remote.commit_attribute(self.id, self._author_property))

    @LazyProperty
    def date(self):
        unix_ts, tz = self._remote.get_object_attrs(
            self.raw_id, self._date_property, self._date_tz_property)
        return utcdate_fromtimestamp(unix_ts, tz)

    @LazyProperty
    def status(self):
        """
        Returns modified, added, removed, deleted files for current commit
        """
        return self.changed, self.added, self.removed

    @LazyProperty
    def tags(self):
        tags = [safe_unicode(name) for name,
                commit_id in self.repository.tags.iteritems()
                if commit_id == self.raw_id]
        return tags

    @LazyProperty
    def branch(self):
        for name, commit_id in self.repository.branches.iteritems():
            if commit_id == self.raw_id:
                return safe_unicode(name)
        return None

    def _get_id_for_path(self, path):
        path = safe_str(path)
        if path in self._paths:
            return self._paths[path]

        tree_id = self._tree_id

        path = path.strip('/')
        if path == '':
            data = [tree_id, "tree"]
            self._paths[''] = data
            return data

        parts = path.split('/')
        dirs, name = parts[:-1], parts[-1]
        cur_dir = ''

        # initially extract things from root dir
        tree_items = self._remote.tree_items(tree_id)
        self._process_tree_items(tree_items, cur_dir)

        for dir in dirs:
            if cur_dir:
                cur_dir = '/'.join((cur_dir, dir))
            else:
                cur_dir = dir
            dir_id = None
            for item, stat_, id_, type_ in tree_items:
                if item == dir:
                    dir_id = id_
                    break
            if dir_id:
                if type_ != "tree":
                    raise CommitError('%s is not a directory' % cur_dir)
                # update tree
                tree_items = self._remote.tree_items(dir_id)
            else:
                raise CommitError('%s have not been found' % cur_dir)

            # cache all items from the given traversed tree
            self._process_tree_items(tree_items, cur_dir)

        if path not in self._paths:
            raise self.no_node_at_path(path)

        return self._paths[path]

    def _process_tree_items(self, items, cur_dir):
        for item, stat_, id_, type_ in items:
            if cur_dir:
                name = '/'.join((cur_dir, item))
            else:
                name = item
            self._paths[name] = [id_, type_]
            self._stat_modes[name] = stat_

    def _get_kind(self, path):
        path_id, type_ = self._get_id_for_path(path)
        if type_ == 'blob':
            return NodeKind.FILE
        elif type_ == 'tree':
            return NodeKind.DIR
        elif type == 'link':
            return NodeKind.SUBMODULE
        return None

    def _get_filectx(self, path):
        path = self._fix_path(path)
        if self._get_kind(path) != NodeKind.FILE:
            raise CommitError(
                "File does not exist for commit %s at  '%s'" %
                (self.raw_id, path))
        return path

    def _get_file_nodes(self):
        return chain(*(t[2] for t in self.walk()))

    @LazyProperty
    def parents(self):
        """
        Returns list of parent commits.
        """
        parent_ids = self._remote.commit_attribute(
            self.id, self._parents_property)
        return self._make_commits(parent_ids)

    @LazyProperty
    def children(self):
        """
        Returns list of child commits.
        """
        rev_filter = settings.GIT_REV_FILTER
        output, __ = self.repository.run_git_command(
            ['rev-list', '--children'] + rev_filter)

        child_ids = []
        pat = re.compile(r'^%s' % self.raw_id)
        for l in output.splitlines():
            if pat.match(l):
                found_ids = l.split(' ')[1:]
                child_ids.extend(found_ids)
        return self._make_commits(child_ids)

    def _make_commits(self, commit_ids, pre_load=None):
        return [
            self.repository.get_commit(commit_id=commit_id, pre_load=pre_load)
            for commit_id in commit_ids]

    def get_file_mode(self, path):
        """
        Returns stat mode of the file at the given `path`.
        """
        path = safe_str(path)
        # ensure path is traversed
        self._get_id_for_path(path)
        return self._stat_modes[path]

    def is_link(self, path):
        return stat.S_ISLNK(self.get_file_mode(path))

    def get_file_content(self, path):
        """
        Returns content of the file at given `path`.
        """
        id_, _ = self._get_id_for_path(path)
        return self._remote.blob_as_pretty_string(id_)

    def get_file_size(self, path):
        """
        Returns size of the file at given `path`.
        """
        id_, _ = self._get_id_for_path(path)
        return self._remote.blob_raw_length(id_)

    def get_file_history(self, path, limit=None, pre_load=None):
        """
        Returns history of file as reversed list of `GitCommit` objects for
        which file at given `path` has been modified.

        TODO: This function now uses an underlying 'git' command which works
        quickly but ideally we should replace with an algorithm.
        """
        self._get_filectx(path)
        f_path = safe_str(path)

        cmd = ['log']
        if limit:
            cmd.extend(['-n', str(safe_int(limit, 0))])
        cmd.extend(['--pretty=format: %H', '-s', self.raw_id, '--', f_path])

        output, __ = self.repository.run_git_command(cmd)
        commit_ids = re.findall(r'[0-9a-fA-F]{40}', output)

        return [
            self.repository.get_commit(commit_id=commit_id, pre_load=pre_load)
            for commit_id in commit_ids]

    # TODO: unused for now potential replacement for subprocess
    def get_file_history_2(self, path, limit=None, pre_load=None):
        """
        Returns history of file as reversed list of `Commit` objects for
        which file at given `path` has been modified.
        """
        self._get_filectx(path)
        f_path = safe_str(path)

        commit_ids = self._remote.get_file_history(f_path, self.id, limit)

        return [
            self.repository.get_commit(commit_id=commit_id, pre_load=pre_load)
            for commit_id in commit_ids]

    def get_file_annotate(self, path, pre_load=None):
        """
        Returns a generator of four element tuples with
            lineno, commit_id, commit lazy loader and line

        TODO: This function now uses os underlying 'git' command which is
        generally not good. Should be replaced with algorithm iterating
        commits.
        """
        cmd = ['blame', '-l', '--root', '-r', self.raw_id, '--', path]
        # -l     ==> outputs long shas (and we need all 40 characters)
        # --root ==> doesn't put '^' character for bounderies
        # -r commit_id ==> blames for the given commit
        output, __ = self.repository.run_git_command(cmd)

        for i, blame_line in enumerate(output.split('\n')[:-1]):
            line_no = i + 1
            commit_id, line = re.split(r' ', blame_line, 1)
            yield (
                line_no, commit_id,
                lambda: self.repository.get_commit(commit_id=commit_id,
                                                   pre_load=pre_load),
                line)

    def get_nodes(self, path):
        if self._get_kind(path) != NodeKind.DIR:
            raise CommitError(
                "Directory does not exist for commit %s at "
                " '%s'" % (self.raw_id, path))
        path = self._fix_path(path)
        id_, _ = self._get_id_for_path(path)
        tree_id = self._remote[id_]['id']
        dirnodes = []
        filenodes = []
        alias = self.repository.alias
        for name, stat_, id_, type_ in self._remote.tree_items(tree_id):
            if type_ == 'link':
                url = self._get_submodule_url('/'.join((path, name)))
                dirnodes.append(SubModuleNode(
                    name, url=url, commit=id_, alias=alias))
                continue

            if path != '':
                obj_path = '/'.join((path, name))
            else:
                obj_path = name
            if obj_path not in self._stat_modes:
                self._stat_modes[obj_path] = stat_

            if type_ == 'tree':
                dirnodes.append(DirNode(obj_path, commit=self))
            elif type_ == 'blob':
                filenodes.append(FileNode(obj_path, commit=self, mode=stat_))
            else:
                raise CommitError(
                    "Requested object should be Tree or Blob, is %s", type_)

        nodes = dirnodes + filenodes
        for node in nodes:
            if node.path not in self.nodes:
                self.nodes[node.path] = node
        nodes.sort()
        return nodes

    def get_node(self, path, pre_load=None):
        if isinstance(path, unicode):
            path = path.encode('utf-8')
        path = self._fix_path(path)
        if path not in self.nodes:
            try:
                id_, type_ = self._get_id_for_path(path)
            except CommitError:
                raise NodeDoesNotExistError(
                    "Cannot find one of parents' directories for a given "
                    "path: %s" % path)

            if type_ == 'link':
                url = self._get_submodule_url(path)
                node = SubModuleNode(path, url=url, commit=id_,
                                     alias=self.repository.alias)
            elif type_ == 'tree':
                if path == '':
                    node = RootNode(commit=self)
                else:
                    node = DirNode(path, commit=self)
            elif type_ == 'blob':
                node = FileNode(path, commit=self, pre_load=pre_load)
            else:
                raise self.no_node_at_path(path)

            # cache node
            self.nodes[path] = node
        return self.nodes[path]

    def get_largefile_node(self, path):
        id_, _ = self._get_id_for_path(path)
        pointer_spec = self._remote.is_large_file(id_)

        if pointer_spec:
            # content of that file regular FileNode is the hash of largefile
            file_id = pointer_spec.get('oid_hash')
            if self._remote.in_largefiles_store(file_id):
                lf_path = self._remote.store_path(file_id)
                return LargeFileNode(lf_path, commit=self, org_path=path)

    @LazyProperty
    def affected_files(self):
        """
        Gets a fast accessible file changes for given commit
        """
        added, modified, deleted = self._changes_cache
        return list(added.union(modified).union(deleted))

    @LazyProperty
    def _changes_cache(self):
        added = set()
        modified = set()
        deleted = set()
        _r = self._remote

        parents = self.parents
        if not self.parents:
            parents = [base.EmptyCommit()]
        for parent in parents:
            if isinstance(parent, base.EmptyCommit):
                oid = None
            else:
                oid = parent.raw_id
            changes = _r.tree_changes(oid, self.raw_id)
            for (oldpath, newpath), (_, _), (_, _) in changes:
                if newpath and oldpath:
                    modified.add(newpath)
                elif newpath and not oldpath:
                    added.add(newpath)
                elif not newpath and oldpath:
                    deleted.add(oldpath)
        return added, modified, deleted

    def _get_paths_for_status(self, status):
        """
        Returns sorted list of paths for given ``status``.

        :param status: one of: *added*, *modified* or *deleted*
        """
        added, modified, deleted = self._changes_cache
        return sorted({
            'added': list(added),
            'modified': list(modified),
            'deleted': list(deleted)}[status]
        )

    @LazyProperty
    def added(self):
        """
        Returns list of added ``FileNode`` objects.
        """
        if not self.parents:
            return list(self._get_file_nodes())
        return AddedFileNodesGenerator(
            [n for n in self._get_paths_for_status('added')], self)

    @LazyProperty
    def changed(self):
        """
        Returns list of modified ``FileNode`` objects.
        """
        if not self.parents:
            return []
        return ChangedFileNodesGenerator(
            [n for n in self._get_paths_for_status('modified')], self)

    @LazyProperty
    def removed(self):
        """
        Returns list of removed ``FileNode`` objects.
        """
        if not self.parents:
            return []
        return RemovedFileNodesGenerator(
            [n for n in self._get_paths_for_status('deleted')], self)

    def _get_submodule_url(self, submodule_path):
        git_modules_path = '.gitmodules'

        if self._submodules is None:
            self._submodules = {}

            try:
                submodules_node = self.get_node(git_modules_path)
            except NodeDoesNotExistError:
                return None

            content = submodules_node.content

            # ConfigParser fails if there are whitespaces
            content = '\n'.join(l.strip() for l in content.split('\n'))

            parser = configparser.ConfigParser()
            parser.readfp(StringIO(content))

            for section in parser.sections():
                path = parser.get(section, 'path')
                url = parser.get(section, 'url')
                if path and url:
                    self._submodules[path.strip('/')] = url

        return self._submodules.get(submodule_path.strip('/'))