# -*- coding: utf-8 -*-

# Copyright (C) 2011-2016  RhodeCode GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/

import logging
from itertools import groupby

from pygments import lex
from pygments.formatters.html import _get_ttype_class as pygment_token_class
from rhodecode.lib.helpers import get_lexer_for_filenode, html_escape
from rhodecode.lib.utils2 import AttributeDict
from rhodecode.lib.vcs.nodes import FileNode
from pygments.lexers import get_lexer_by_name

plain_text_lexer = get_lexer_by_name(
    'text', stripall=False, stripnl=False, ensurenl=False)


log = logging.getLogger()


def filenode_as_lines_tokens(filenode, lexer=None):
    lexer = lexer or get_lexer_for_filenode(filenode)
    log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
    tokens = tokenize_string(filenode.content, get_lexer_for_filenode(filenode))
    lines = split_token_stream(tokens, split_string='\n')
    rv = list(lines)
    return rv


def tokenize_string(content, lexer):
    """
    Use pygments to tokenize some content based on a lexer
    ensuring all original new lines and whitespace is preserved
    """

    lexer.stripall = False
    lexer.stripnl = False
    lexer.ensurenl = False
    for token_type, token_text in lex(content, lexer):
        yield pygment_token_class(token_type), token_text


def split_token_stream(tokens, split_string=u'\n'):
    """
    Take a list of (TokenType, text) tuples and split them by a string

    >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
    [(TEXT, 'some'), (TEXT, 'text'),
     (TEXT, 'more'), (TEXT, 'text')]
    """

    buffer = []
    for token_class, token_text in tokens:
        parts = token_text.split(split_string)
        for part in parts[:-1]:
            buffer.append((token_class, part))
            yield buffer
            buffer = []

        buffer.append((token_class, parts[-1]))

    if buffer:
        yield buffer


def filenode_as_annotated_lines_tokens(filenode):
    """
    Take a file node and return a list of annotations => lines, if no annotation
    is found, it will be None.

    eg:

    [
        (annotation1, [
            (1, line1_tokens_list),
            (2, line2_tokens_list),
        ]),
        (annotation2, [
            (3, line1_tokens_list),
        ]),
        (None, [
            (4, line1_tokens_list),
        ]),
        (annotation1, [
            (5, line1_tokens_list),
            (6, line2_tokens_list),
        ])
    ]
    """

    commit_cache = {} # cache commit_getter lookups

    def _get_annotation(commit_id, commit_getter):
        if commit_id not in commit_cache:
            commit_cache[commit_id] = commit_getter()
        return commit_cache[commit_id]

    annotation_lookup = {
        line_no: _get_annotation(commit_id, commit_getter)
        for line_no, commit_id, commit_getter, line_content
        in filenode.annotate
    }

    annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
                          for line_no, tokens
                          in enumerate(filenode_as_lines_tokens(filenode), 1))

    grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

    for annotation, group in grouped_annotations_lines:
        yield (
            annotation, [(line_no, tokens)
                          for (_, line_no, tokens) in group]
        )


def render_tokenstream(tokenstream):
    result = []
    for token_class, token_ops_texts in rollup_tokenstream(tokenstream):

        if token_class:
            result.append(u'<span class="%s">' % token_class)
        else:
            result.append(u'<span>')

        for op_tag, token_text in token_ops_texts:

            if op_tag:
                result.append(u'<%s>' % op_tag)

            escaped_text = html_escape(token_text)
            escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')

            result.append(escaped_text)

            if op_tag:
                result.append(u'</%s>' % op_tag)

        result.append(u'</span>')

    html = ''.join(result)
    return html


def rollup_tokenstream(tokenstream):
    """
    Group a token stream of the format:

        ('class', 'op', 'text')
    or
        ('class', 'text')

    into

        [('class1',
            [('op1', 'text'),
             ('op2', 'text')]),
         ('class2',
            [('op3', 'text')])]

    This is used to get the minimal tags necessary when
    rendering to html eg for a token stream ie.

    <span class="A"><ins>he</ins>llo</span>
    vs
    <span class="A"><ins>he</ins></span><span class="A">llo</span>

    If a 2 tuple is passed in, the output op will be an empty string.

    eg:

    >>> rollup_tokenstream([('classA', '',      'h'),
                            ('classA', 'del',   'ell'),
                            ('classA', '',      'o'),
                            ('classB', '',      ' '),
                            ('classA', '',      'the'),
                            ('classA', '',      're'),
                            ])

    [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
     ('classB', [('', ' ')],
     ('classA', [('', 'there')]]

    """
    if tokenstream and len(tokenstream[0]) == 2:
        tokenstream = ((t[0], '', t[1]) for t in tokenstream)

    result = []
    for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
        ops = []
        for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
            text_buffer = []
            for t_class, t_op, t_text in token_text_list:
                text_buffer.append(t_text)
            ops.append((token_op, ''.join(text_buffer)))
        result.append((token_class, ops))
    return result