# -*- coding: utf-8 -*-

# Copyright (C) 2011-2016  RhodeCode GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/


from itertools import groupby

from pygments import lex
# PYGMENTS_TOKEN_TYPES is used in a hot loop keep attribute lookups to a minimum
from pygments.token import STANDARD_TYPES as PYGMENTS_TOKEN_TYPES

from rhodecode.lib.helpers import get_lexer_for_filenode

def tokenize_file(content, lexer):
    """
    Use pygments to tokenize some content based on a lexer
    ensuring all original new lines and whitespace is preserved
    """

    lexer.stripall = False
    lexer.stripnl = False
    lexer.ensurenl = False
    return lex(content, lexer)


def pygment_token_class(token_type):
    """ Convert a pygments token type to html class name """

    fname = PYGMENTS_TOKEN_TYPES.get(token_type)
    if fname:
        return fname

    aname = ''
    while fname is None:
        aname = '-' + token_type[-1] + aname
        token_type = token_type.parent
        fname = PYGMENTS_TOKEN_TYPES.get(token_type)

    return fname + aname


def tokens_as_lines(tokens, split_string=u'\n'):
    """
    Take a list of (TokenType, text) tuples and split them by a string

    eg. [(TEXT, 'some\ntext')] => [(TEXT, 'some'), (TEXT, 'text')]
    """

    buffer = []
    for token_type, token_text in tokens:
        parts = token_text.split(split_string)
        for part in parts[:-1]:
            buffer.append((token_type, part))
            yield buffer
            buffer = []

        buffer.append((token_type, parts[-1]))

    if buffer:
        yield buffer


def filenode_as_lines_tokens(filenode):
    """
    Return a generator of lines with pygment tokens for a filenode eg:

    [
        (1, line1_tokens_list),
        (2, line1_tokens_list]),
    ]
    """

    return enumerate(
      tokens_as_lines(
        tokenize_file(
          filenode.content, get_lexer_for_filenode(filenode)
        )
      ),
    1)


def filenode_as_annotated_lines_tokens(filenode):
    """
    Take a file node and return a list of annotations => lines, if no annotation
    is found, it will be None.

    eg:

    [
        (annotation1, [
            (1, line1_tokens_list),
            (2, line2_tokens_list),
        ]),
        (annotation2, [
            (3, line1_tokens_list),
        ]),
        (None, [
            (4, line1_tokens_list),
        ]),
        (annotation1, [
            (5, line1_tokens_list),
            (6, line2_tokens_list),
        ])
    ]
    """


    # cache commit_getter lookups
    commit_cache = {}
    def _get_annotation(commit_id, commit_getter):
        if commit_id not in commit_cache:
            commit_cache[commit_id] = commit_getter()
        return commit_cache[commit_id]

    annotation_lookup = {
        line_no: _get_annotation(commit_id, commit_getter)
        for line_no, commit_id, commit_getter, line_content
        in filenode.annotate
    }

    annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
                          for line_no, tokens
                          in filenode_as_lines_tokens(filenode))

    grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

    for annotation, group in grouped_annotations_lines:
        yield (
            annotation, [(line_no, tokens)
                          for (_, line_no, tokens) in group]
        )