# HG changeset patch # User Daniel Dourvaris # Date 2016-10-19 07:37:07 # Node ID 8ba7d01618d4bc7169a72cb087c8bbdce58dffb7 # Parent 56031659137ba81341b737d79e08408e9160e6aa codeblocks: add new code token rendering function that supports diff and normal tokens. diff --git a/rhodecode/lib/codeblocks.py b/rhodecode/lib/codeblocks.py --- a/rhodecode/lib/codeblocks.py +++ b/rhodecode/lib/codeblocks.py @@ -18,16 +18,33 @@ # RhodeCode Enterprise Edition, including its added features, Support services, # and proprietary license terms, please see https://rhodecode.com/licenses/ - +import logging from itertools import groupby from pygments import lex -# PYGMENTS_TOKEN_TYPES is used in a hot loop keep attribute lookups to a minimum -from pygments.token import STANDARD_TYPES as PYGMENTS_TOKEN_TYPES +from pygments.formatters.html import _get_ttype_class as pygment_token_class +from rhodecode.lib.helpers import get_lexer_for_filenode, html_escape +from rhodecode.lib.utils2 import AttributeDict +from rhodecode.lib.vcs.nodes import FileNode +from pygments.lexers import get_lexer_by_name + +plain_text_lexer = get_lexer_by_name( + 'text', stripall=False, stripnl=False, ensurenl=False) + + +log = logging.getLogger() -from rhodecode.lib.helpers import get_lexer_for_filenode -def tokenize_file(content, lexer): +def filenode_as_lines_tokens(filenode, lexer=None): + lexer = lexer or get_lexer_for_filenode(filenode) + log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode) + tokens = tokenize_string(filenode.content, get_lexer_for_filenode(filenode)) + lines = split_token_stream(tokens, split_string='\n') + rv = list(lines) + return rv + + +def tokenize_string(content, lexer): """ Use pygments to tokenize some content based on a lexer ensuring all original new lines and whitespace is preserved @@ -36,65 +53,33 @@ def tokenize_file(content, lexer): lexer.stripall = False lexer.stripnl = False lexer.ensurenl = False - return lex(content, lexer) + for token_type, token_text in lex(content, lexer): + yield pygment_token_class(token_type), token_text -def pygment_token_class(token_type): - """ Convert a pygments token type to html class name """ - - fname = PYGMENTS_TOKEN_TYPES.get(token_type) - if fname: - return fname - - aname = '' - while fname is None: - aname = '-' + token_type[-1] + aname - token_type = token_type.parent - fname = PYGMENTS_TOKEN_TYPES.get(token_type) - - return fname + aname - - -def tokens_as_lines(tokens, split_string=u'\n'): +def split_token_stream(tokens, split_string=u'\n'): """ Take a list of (TokenType, text) tuples and split them by a string - eg. [(TEXT, 'some\ntext')] => [(TEXT, 'some'), (TEXT, 'text')] + >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')]) + [(TEXT, 'some'), (TEXT, 'text'), + (TEXT, 'more'), (TEXT, 'text')] """ buffer = [] - for token_type, token_text in tokens: + for token_class, token_text in tokens: parts = token_text.split(split_string) for part in parts[:-1]: - buffer.append((token_type, part)) + buffer.append((token_class, part)) yield buffer buffer = [] - buffer.append((token_type, parts[-1])) + buffer.append((token_class, parts[-1])) if buffer: yield buffer -def filenode_as_lines_tokens(filenode): - """ - Return a generator of lines with pygment tokens for a filenode eg: - - [ - (1, line1_tokens_list), - (2, line1_tokens_list]), - ] - """ - - return enumerate( - tokens_as_lines( - tokenize_file( - filenode.content, get_lexer_for_filenode(filenode) - ) - ), - 1) - - def filenode_as_annotated_lines_tokens(filenode): """ Take a file node and return a list of annotations => lines, if no annotation @@ -120,9 +105,8 @@ def filenode_as_annotated_lines_tokens(f ] """ + commit_cache = {} # cache commit_getter lookups - # cache commit_getter lookups - commit_cache = {} def _get_annotation(commit_id, commit_getter): if commit_id not in commit_cache: commit_cache[commit_id] = commit_getter() @@ -136,7 +120,7 @@ def filenode_as_annotated_lines_tokens(f annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens) for line_no, tokens - in filenode_as_lines_tokens(filenode)) + in enumerate(filenode_as_lines_tokens(filenode), 1)) grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0]) @@ -145,3 +129,86 @@ def filenode_as_annotated_lines_tokens(f annotation, [(line_no, tokens) for (_, line_no, tokens) in group] ) + + +def render_tokenstream(tokenstream): + result = [] + for token_class, token_ops_texts in rollup_tokenstream(tokenstream): + + if token_class: + result.append(u'' % token_class) + else: + result.append(u'') + + for op_tag, token_text in token_ops_texts: + + if op_tag: + result.append(u'<%s>' % op_tag) + + escaped_text = html_escape(token_text) + escaped_text = escaped_text.replace('\n', '\n') + + result.append(escaped_text) + + if op_tag: + result.append(u'' % op_tag) + + result.append(u'') + + html = ''.join(result) + return html + + +def rollup_tokenstream(tokenstream): + """ + Group a token stream of the format: + + ('class', 'op', 'text') + or + ('class', 'text') + + into + + [('class1', + [('op1', 'text'), + ('op2', 'text')]), + ('class2', + [('op3', 'text')])] + + This is used to get the minimal tags necessary when + rendering to html eg for a token stream ie. + + hello + vs + hello + + If a 2 tuple is passed in, the output op will be an empty string. + + eg: + + >>> rollup_tokenstream([('classA', '', 'h'), + ('classA', 'del', 'ell'), + ('classA', '', 'o'), + ('classB', '', ' '), + ('classA', '', 'the'), + ('classA', '', 're'), + ]) + + [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')], + ('classB', [('', ' ')], + ('classA', [('', 'there')]] + + """ + if tokenstream and len(tokenstream[0]) == 2: + tokenstream = ((t[0], '', t[1]) for t in tokenstream) + + result = [] + for token_class, op_list in groupby(tokenstream, lambda t: t[0]): + ops = [] + for token_op, token_text_list in groupby(op_list, lambda o: o[1]): + text_buffer = [] + for t_class, t_op, t_text in token_text_list: + text_buffer.append(t_text) + ops.append((token_op, ''.join(text_buffer))) + result.append((token_class, ops)) + return result diff --git a/rhodecode/public/css/code-block.less b/rhodecode/public/css/code-block.less --- a/rhodecode/public/css/code-block.less +++ b/rhodecode/public/css/code-block.less @@ -644,6 +644,9 @@ pre.literal-block, .codehilite pre{ /* START NEW CODE BLOCK CSS */ +@cb-line-height: 18px; +@cb-line-code-padding: 10px; + table.cb { width: 100%; border-collapse: collapse; @@ -678,21 +681,23 @@ table.cb { td { vertical-align: top; - padding: 2px 10px; + padding: 0; &.cb-content { - white-space: pre-wrap; - font-family: @font-family-monospace; font-size: 12.35px; - span { + span.cb-code { + line-height: @cb-line-height; + padding-left: @cb-line-code-padding; + display: block; + white-space: pre-wrap; + font-family: @font-family-monospace; word-break: break-word; } } &.cb-lineno { padding: 0; - height: 1px; /* this allows the link to fill to 100% height of the td */ width: 50px; color: rgba(0, 0, 0, 0.3); text-align: right; @@ -702,21 +707,20 @@ table.cb { a::before { content: attr(data-line-no); } - &.cb-line-selected { + &.cb-line-selected a { background: @comment-highlight-color !important; } a { display: block; - height: 100%; + padding-right: @cb-line-code-padding; + line-height: @cb-line-height; color: rgba(0, 0, 0, 0.3); - padding: 0 10px; /* vertical padding is 0 so that height: 100% works */ - line-height: 18px; /* use this instead of vertical padding */ } } &.cb-content { - &.cb-line-selected { + &.cb-line-selected .cb-code { background: @comment-highlight-color !important; } } diff --git a/rhodecode/templates/codeblocks/source.html b/rhodecode/templates/codeblocks/source.html --- a/rhodecode/templates/codeblocks/source.html +++ b/rhodecode/templates/codeblocks/source.html @@ -2,9 +2,9 @@ annotation=None, bgcolor=None)"> <% - # avoid module lookups for performance - from rhodecode.lib.codeblocks import pygment_token_class - from rhodecode.lib.helpers import html_escape + from rhodecode.lib.codeblocks import render_tokenstream + # avoid module lookup for performance + html_escape = h.html_escape %> ${ - ''.join( - '%s' % - (pygment_token_class(token_type), html_escape(token_text)) - for token_type, token_text in tokens) + '\n' | n - } - ## this ugly list comp is necessary for performance + > + ## newline at end is necessary for highlight to work when line is empty + ## and for copy pasting code to work as expected + ${render_tokenstream(tokens)|n}${'\n'} + diff --git a/rhodecode/templates/files/files_source.html b/rhodecode/templates/files/files_source.html --- a/rhodecode/templates/files/files_source.html +++ b/rhodecode/templates/files/files_source.html @@ -62,7 +62,7 @@ ${sourceblock.render_annotation_lines(annotation, lines, color_hasher)} %endfor %else: - %for line_num, tokens in c.lines: + %for line_num, tokens in enumerate(c.lines, 1): ${sourceblock.render_line(line_num, tokens)} %endfor %endif diff --git a/rhodecode/tests/lib/test_codeblocks.py b/rhodecode/tests/lib/test_codeblocks.py new file mode 100644 --- /dev/null +++ b/rhodecode/tests/lib/test_codeblocks.py @@ -0,0 +1,330 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2016-2016 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import pytest + +from rhodecode.lib.codeblocks import ( + tokenize_string, split_token_stream, rollup_tokenstream, + render_tokenstream) +from pygments.lexers import get_lexer_by_name + + +class TestTokenizeString(object): + + python_code = ''' + import this + + var = 6 + print "this" + + ''' + + def test_tokenize_as_python(self): + lexer = get_lexer_by_name('python') + tokens = list(tokenize_string(self.python_code, lexer)) + + assert tokens == [ + ('', u'\n'), + ('', u' '), + ('kn', u'import'), + ('', u' '), + ('nn', u'this'), + ('', u'\n'), + ('', u'\n'), + ('', u' '), + ('n', u'var'), + ('', u' '), + ('o', u'='), + ('', u' '), + ('mi', u'6'), + ('', u'\n'), + ('', u' '), + ('k', u'print'), + ('', u' '), + ('s2', u'"'), + ('s2', u'this'), + ('s2', u'"'), + ('', u'\n'), + ('', u'\n'), + ('', u' ') + ] + + def test_tokenize_as_text(self): + lexer = get_lexer_by_name('text') + tokens = list(tokenize_string(self.python_code, lexer)) + + assert tokens == [ + ('', + u'\n import this\n\n var = 6\n print "this"\n\n ') + ] + + +class TestSplitTokenStream(object): + + def test_split_token_stream(self): + lines = list(split_token_stream( + [('type1', 'some\ntext'), ('type2', 'more\n')])) + + assert lines == [ + [('type1', u'some')], + [('type1', u'text'), ('type2', u'more')], + [('type2', u'')], + ] + + def test_split_token_stream_other_char(self): + lines = list(split_token_stream( + [('type1', 'some\ntext'), ('type2', 'more\n')], + split_string='m')) + + assert lines == [ + [('type1', 'so')], + [('type1', 'e\ntext'), ('type2', '')], + [('type2', 'ore\n')], + ] + + def test_split_token_stream_without_char(self): + lines = list(split_token_stream( + [('type1', 'some\ntext'), ('type2', 'more\n')], + split_string='z')) + + assert lines == [ + [('type1', 'some\ntext'), ('type2', 'more\n')] + ] + + def test_split_token_stream_single(self): + lines = list(split_token_stream( + [('type1', '\n')], split_string='\n')) + + assert lines == [ + [('type1', '')], + [('type1', '')], + ] + + def test_split_token_stream_single_repeat(self): + lines = list(split_token_stream( + [('type1', '\n\n\n')], split_string='\n')) + + assert lines == [ + [('type1', '')], + [('type1', '')], + [('type1', '')], + [('type1', '')], + ] + + def test_split_token_stream_multiple_repeat(self): + lines = list(split_token_stream( + [('type1', '\n\n'), ('type2', '\n\n')], split_string='\n')) + + assert lines == [ + [('type1', '')], + [('type1', '')], + [('type1', ''), ('type2', '')], + [('type2', '')], + [('type2', '')], + ] + + +class TestRollupTokens(object): + + @pytest.mark.parametrize('tokenstream,output', [ + ([], + []), + ([('A', 'hell'), ('A', 'o')], [ + ('A', [ + ('', 'hello')]), + ]), + ([('A', 'hell'), ('B', 'o')], [ + ('A', [ + ('', 'hell')]), + ('B', [ + ('', 'o')]), + ]), + ([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [ + ('A', [ + ('', 'hello')]), + ('B', [ + ('', ' ')]), + ('A', [ + ('', 'there')]), + ]), + ]) + def test_rollup_tokenstream_without_ops(self, tokenstream, output): + assert list(rollup_tokenstream(tokenstream)) == output + + @pytest.mark.parametrize('tokenstream,output', [ + ([], + []), + ([('A', '', 'hell'), ('A', '', 'o')], [ + ('A', [ + ('', 'hello')]), + ]), + ([('A', '', 'hell'), ('B', '', 'o')], [ + ('A', [ + ('', 'hell')]), + ('B', [ + ('', 'o')]), + ]), + ([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [ + ('A', [ + ('', 'h')]), + ('B', [ + ('', 'e')]), + ('C', [ + ('', 'y')]), + ]), + ([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [ + ('A', [ + ('', 'he')]), + ('C', [ + ('', 'y')]), + ]), + ([('A', 'ins', 'h'), ('A', 'ins', 'e')], [ + ('A', [ + ('ins', 'he') + ]), + ]), + ([('A', 'ins', 'h'), ('A', 'del', 'e')], [ + ('A', [ + ('ins', 'h'), + ('del', 'e') + ]), + ]), + ([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [ + ('A', [ + ('ins', 'h'), + ]), + ('B', [ + ('del', 'ey'), + ]), + ]), + ([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [ + ('A', [ + ('ins', 'h'), + ('del', 'e'), + ]), + ('B', [ + ('del', 'y'), + ]), + ]), + ([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [ + ('A', [ + ('', 'some'), + ('ins', 'new'), + ('', 'name'), + ]), + ]), + ]) + def test_rollup_tokenstream_with_ops(self, tokenstream, output): + assert list(rollup_tokenstream(tokenstream)) == output + + +class TestRenderTokenStream(object): + + @pytest.mark.parametrize('tokenstream,output', [ + ( + [], + '', + ), + ( + [('', '', u'')], + '', + ), + ( + [('', '', u'text')], + 'text', + ), + ( + [('A', '', u'')], + '', + ), + ( + [('A', '', u'hello')], + 'hello', + ), + ( + [('A', '', u'hel'), ('A', '', u'lo')], + 'hello', + ), + ( + [('A', '', u'two\n'), ('A', '', u'lines')], + 'two\nlines', + ), + ( + [('A', '', u'\nthree\n'), ('A', '', u'lines')], + '\nthree\nlines', + ), + ( + [('', '', u'\n'), ('A', '', u'line')], + '\nline', + ), + ( + [('', 'ins', u'\n'), ('A', '', u'line')], + '\nline', + ), + ( + [('A', '', u'hel'), ('A', 'ins', u'lo')], + 'hello', + ), + ( + [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')], + 'hello', + ), + ( + [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')], + 'hello', + ), + ( + [('A', '', u'hel'), ('B', '', u'lo')], + 'hello', + ), + ( + [('A', '', u'hel'), ('B', 'ins', u'lo')], + 'hello', + ), + ]) + def test_render_tokenstream_with_ops(self, tokenstream, output): + html = render_tokenstream(tokenstream) + assert html == output + + @pytest.mark.parametrize('tokenstream,output', [ + ( + [('A', u'hel'), ('A', u'lo')], + 'hello', + ), + ( + [('A', u'hel'), ('A', u'l'), ('A', u'o')], + 'hello', + ), + ( + [('A', u'hel'), ('A', u'l'), ('A', u'o')], + 'hello', + ), + ( + [('A', u'hel'), ('B', u'lo')], + 'hello', + ), + ( + [('A', u'hel'), ('B', u'lo')], + 'hello', + ), + ]) + def test_render_tokenstream_without_ops(self, tokenstream, output): + html = render_tokenstream(tokenstream) + assert html == output