|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
# Copyright (C) 2011-2016 RhodeCode GmbH
|
|
|
#
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
# it under the terms of the GNU Affero General Public License, version 3
|
|
|
# (only), as published by the Free Software Foundation.
|
|
|
#
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
# GNU General Public License for more details.
|
|
|
#
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
#
|
|
|
# This program is dual-licensed. If you wish to learn more about the
|
|
|
# RhodeCode Enterprise Edition, including its added features, Support services,
|
|
|
# and proprietary license terms, please see https://rhodecode.com/licenses/
|
|
|
|
|
|
|
|
|
from itertools import groupby
|
|
|
|
|
|
from pygments import lex
|
|
|
# PYGMENTS_TOKEN_TYPES is used in a hot loop keep attribute lookups to a minimum
|
|
|
from pygments.token import STANDARD_TYPES as PYGMENTS_TOKEN_TYPES
|
|
|
|
|
|
from rhodecode.lib.helpers import get_lexer_for_filenode
|
|
|
|
|
|
def tokenize_file(content, lexer):
|
|
|
"""
|
|
|
Use pygments to tokenize some content based on a lexer
|
|
|
ensuring all original new lines and whitespace is preserved
|
|
|
"""
|
|
|
|
|
|
lexer.stripall = False
|
|
|
lexer.stripnl = False
|
|
|
lexer.ensurenl = False
|
|
|
return lex(content, lexer)
|
|
|
|
|
|
|
|
|
def pygment_token_class(token_type):
|
|
|
""" Convert a pygments token type to html class name """
|
|
|
|
|
|
fname = PYGMENTS_TOKEN_TYPES.get(token_type)
|
|
|
if fname:
|
|
|
return fname
|
|
|
|
|
|
aname = ''
|
|
|
while fname is None:
|
|
|
aname = '-' + token_type[-1] + aname
|
|
|
token_type = token_type.parent
|
|
|
fname = PYGMENTS_TOKEN_TYPES.get(token_type)
|
|
|
|
|
|
return fname + aname
|
|
|
|
|
|
|
|
|
def tokens_as_lines(tokens, split_string=u'\n'):
|
|
|
"""
|
|
|
Take a list of (TokenType, text) tuples and split them by a string
|
|
|
|
|
|
eg. [(TEXT, 'some\ntext')] => [(TEXT, 'some'), (TEXT, 'text')]
|
|
|
"""
|
|
|
|
|
|
buffer = []
|
|
|
for token_type, token_text in tokens:
|
|
|
parts = token_text.split(split_string)
|
|
|
for part in parts[:-1]:
|
|
|
buffer.append((token_type, part))
|
|
|
yield buffer
|
|
|
buffer = []
|
|
|
|
|
|
buffer.append((token_type, parts[-1]))
|
|
|
|
|
|
if buffer:
|
|
|
yield buffer
|
|
|
|
|
|
|
|
|
def filenode_as_lines_tokens(filenode):
|
|
|
"""
|
|
|
Return a generator of lines with pygment tokens for a filenode eg:
|
|
|
|
|
|
[
|
|
|
(1, line1_tokens_list),
|
|
|
(2, line1_tokens_list]),
|
|
|
]
|
|
|
"""
|
|
|
|
|
|
return enumerate(
|
|
|
tokens_as_lines(
|
|
|
tokenize_file(
|
|
|
filenode.content, get_lexer_for_filenode(filenode)
|
|
|
)
|
|
|
),
|
|
|
1)
|
|
|
|
|
|
|
|
|
def filenode_as_annotated_lines_tokens(filenode):
|
|
|
"""
|
|
|
Take a file node and return a list of annotations => lines, if no annotation
|
|
|
is found, it will be None.
|
|
|
|
|
|
eg:
|
|
|
|
|
|
[
|
|
|
(annotation1, [
|
|
|
(1, line1_tokens_list),
|
|
|
(2, line2_tokens_list),
|
|
|
]),
|
|
|
(annotation2, [
|
|
|
(3, line1_tokens_list),
|
|
|
]),
|
|
|
(None, [
|
|
|
(4, line1_tokens_list),
|
|
|
]),
|
|
|
(annotation1, [
|
|
|
(5, line1_tokens_list),
|
|
|
(6, line2_tokens_list),
|
|
|
])
|
|
|
]
|
|
|
"""
|
|
|
|
|
|
|
|
|
# cache commit_getter lookups
|
|
|
commit_cache = {}
|
|
|
def _get_annotation(commit_id, commit_getter):
|
|
|
if commit_id not in commit_cache:
|
|
|
commit_cache[commit_id] = commit_getter()
|
|
|
return commit_cache[commit_id]
|
|
|
|
|
|
annotation_lookup = {
|
|
|
line_no: _get_annotation(commit_id, commit_getter)
|
|
|
for line_no, commit_id, commit_getter, line_content
|
|
|
in filenode.annotate
|
|
|
}
|
|
|
|
|
|
annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
|
|
|
for line_no, tokens
|
|
|
in filenode_as_lines_tokens(filenode))
|
|
|
|
|
|
grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
|
|
|
|
|
|
for annotation, group in grouped_annotations_lines:
|
|
|
yield (
|
|
|
annotation, [(line_no, tokens)
|
|
|
for (_, line_no, tokens) in group]
|
|
|
)
|
|
|
|