# HG changeset patch # User Marcin Lulek # Date 2019-01-31 12:52:30 # Node ID e5ce09620d82e8dbc5a2e5397cf34e9a4fea5d62 # Parent 6236d00063d4a55b706e3cbec9ec3fcbe03d2863 diffs: fixed case of bogus files diff rendering - adds safe placehodler so we never crush more in this cases at it's 2nd time this happens - fixes #5528 - references #5422 diff --git a/rhodecode/lib/codeblocks.py b/rhodecode/lib/codeblocks.py --- a/rhodecode/lib/codeblocks.py +++ b/rhodecode/lib/codeblocks.py @@ -49,8 +49,9 @@ def filenode_as_lines_tokens(filenode, l lexer = lexer or get_lexer_for_filenode(filenode) log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s', lexer, filenode, org_lexer) - tokens = tokenize_string(filenode.content, lexer) - lines = split_token_stream(tokens) + content = filenode.content + tokens = tokenize_string(content, lexer) + lines = split_token_stream(tokens, content) rv = list(lines) return rv @@ -74,7 +75,7 @@ def tokenize_string(content, lexer): yield pygment_token_class(token_type), token_text -def split_token_stream(tokens): +def split_token_stream(tokens, content): """ Take a list of (TokenType, text) tuples and split them by a string @@ -83,18 +84,23 @@ def split_token_stream(tokens): (TEXT, 'more'), (TEXT, 'text')] """ - buffer = [] + token_buffer = [] for token_class, token_text in tokens: parts = token_text.split('\n') for part in parts[:-1]: - buffer.append((token_class, part)) - yield buffer - buffer = [] + token_buffer.append((token_class, part)) + yield token_buffer + token_buffer = [] + + token_buffer.append((token_class, parts[-1])) - buffer.append((token_class, parts[-1])) - - if buffer: - yield buffer + if token_buffer: + yield token_buffer + elif content: + # this is a special case, we have the content, but tokenization didn't produce + # any results. THis can happen if know file extensions like .css have some bogus + # unicode content without any newline characters + yield [(pygment_token_class(Token.Text), content)] def filenode_as_annotated_lines_tokens(filenode): @@ -721,7 +727,11 @@ class DiffSet(object): if filenode not in self.highlighted_filenodes: tokenized_lines = filenode_as_lines_tokens(filenode, lexer) self.highlighted_filenodes[filenode] = tokenized_lines - return self.highlighted_filenodes[filenode][line_number - 1] + + try: + return self.highlighted_filenodes[filenode][line_number - 1] + except Exception: + return [('', u'rhodecode diff rendering error')] def action_to_op(self, action): return { diff --git a/rhodecode/tests/lib/test_codeblocks.py b/rhodecode/tests/lib/test_codeblocks.py --- a/rhodecode/tests/lib/test_codeblocks.py +++ b/rhodecode/tests/lib/test_codeblocks.py @@ -81,8 +81,9 @@ class TestTokenizeString(object): class TestSplitTokenStream(object): def test_split_token_stream(self): - lines = list(split_token_stream( - [('type1', 'some\ntext'), ('type2', 'more\n')])) + tokens = [('type1', 'some\ntext'), ('type2', 'more\n')] + content = [x + y for x, y in tokens] + lines = list(split_token_stream(tokens, content)) assert lines == [ [('type1', u'some')], @@ -91,18 +92,18 @@ class TestSplitTokenStream(object): ] def test_split_token_stream_single(self): - lines = list(split_token_stream( - [('type1', '\n')])) - + tokens = [('type1', '\n')] + content = [x + y for x, y in tokens] + lines = list(split_token_stream(tokens, content)) assert lines == [ [('type1', '')], [('type1', '')], ] def test_split_token_stream_single_repeat(self): - lines = list(split_token_stream( - [('type1', '\n\n\n')])) - + tokens = [('type1', '\n\n\n')] + content = [x + y for x, y in tokens] + lines = list(split_token_stream(tokens, content)) assert lines == [ [('type1', '')], [('type1', '')], @@ -111,9 +112,10 @@ class TestSplitTokenStream(object): ] def test_split_token_stream_multiple_repeat(self): - lines = list(split_token_stream( - [('type1', '\n\n'), ('type2', '\n\n')])) + tokens = [('type1', '\n\n'), ('type2', '\n\n')] + content = [x + y for x, y in tokens] + lines = list(split_token_stream(tokens, content)) assert lines == [ [('type1', '')], [('type1', '')], @@ -122,6 +124,27 @@ class TestSplitTokenStream(object): [('type2', '')], ] + def test_no_tokens_by_content(self): + tokens = [] + content = u'\ufeff' + lines = list(split_token_stream(tokens, content)) + assert lines == [ + [('', content)], + ] + + def test_no_tokens_by_valid_content(self): + from pygments.lexers.css import CssLexer + content = u'\ufeff table.dataTable' + tokens = tokenize_string(content, CssLexer()) + + lines = list(split_token_stream(tokens, content)) + assert lines == [ + [('', u' '), + ('nt', u'table'), + ('p', u'.'), + ('nc', u'dataTable')], + ] + class TestRollupTokens(object):