# HG changeset patch
# User Marcin Lulek <mlulek@rhodecode.com>
# Date 2019-01-31 12:52:30
# Node ID e5ce09620d82e8dbc5a2e5397cf34e9a4fea5d62
# Parent  6236d00063d4a55b706e3cbec9ec3fcbe03d2863

diffs: fixed case of bogus files diff rendering

- adds safe placehodler so we never crush more in this cases at it's 2nd time this happens
- fixes #5528
- references #5422

diff --git a/rhodecode/lib/codeblocks.py b/rhodecode/lib/codeblocks.py
--- a/rhodecode/lib/codeblocks.py
+++ b/rhodecode/lib/codeblocks.py
@@ -49,8 +49,9 @@ def filenode_as_lines_tokens(filenode, l
     lexer = lexer or get_lexer_for_filenode(filenode)
     log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
               lexer, filenode, org_lexer)
-    tokens = tokenize_string(filenode.content, lexer)
-    lines = split_token_stream(tokens)
+    content = filenode.content
+    tokens = tokenize_string(content, lexer)
+    lines = split_token_stream(tokens, content)
     rv = list(lines)
     return rv
 
@@ -74,7 +75,7 @@ def tokenize_string(content, lexer):
         yield pygment_token_class(token_type), token_text
 
 
-def split_token_stream(tokens):
+def split_token_stream(tokens, content):
     """
     Take a list of (TokenType, text) tuples and split them by a string
 
@@ -83,18 +84,23 @@ def split_token_stream(tokens):
      (TEXT, 'more'), (TEXT, 'text')]
     """
 
-    buffer = []
+    token_buffer = []
     for token_class, token_text in tokens:
         parts = token_text.split('\n')
         for part in parts[:-1]:
-            buffer.append((token_class, part))
-            yield buffer
-            buffer = []
+            token_buffer.append((token_class, part))
+            yield token_buffer
+            token_buffer = []
+
+        token_buffer.append((token_class, parts[-1]))
 
-        buffer.append((token_class, parts[-1]))
-
-    if buffer:
-        yield buffer
+    if token_buffer:
+        yield token_buffer
+    elif content:
+        # this is a special case, we have the content, but tokenization didn't produce
+        # any results. THis can happen if know file extensions like .css have some bogus
+        # unicode content without any newline characters
+        yield [(pygment_token_class(Token.Text), content)]
 
 
 def filenode_as_annotated_lines_tokens(filenode):
@@ -721,7 +727,11 @@ class DiffSet(object):
         if filenode not in self.highlighted_filenodes:
             tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
             self.highlighted_filenodes[filenode] = tokenized_lines
-        return self.highlighted_filenodes[filenode][line_number - 1]
+
+        try:
+            return self.highlighted_filenodes[filenode][line_number - 1]
+        except Exception:
+            return [('', u'rhodecode diff rendering error')]
 
     def action_to_op(self, action):
         return {
diff --git a/rhodecode/tests/lib/test_codeblocks.py b/rhodecode/tests/lib/test_codeblocks.py
--- a/rhodecode/tests/lib/test_codeblocks.py
+++ b/rhodecode/tests/lib/test_codeblocks.py
@@ -81,8 +81,9 @@ class TestTokenizeString(object):
 class TestSplitTokenStream(object):
 
     def test_split_token_stream(self):
-        lines = list(split_token_stream(
-            [('type1', 'some\ntext'), ('type2', 'more\n')]))
+        tokens = [('type1', 'some\ntext'), ('type2', 'more\n')]
+        content = [x + y for x, y in tokens]
+        lines = list(split_token_stream(tokens, content))
 
         assert lines == [
             [('type1', u'some')],
@@ -91,18 +92,18 @@ class TestSplitTokenStream(object):
         ]
 
     def test_split_token_stream_single(self):
-        lines = list(split_token_stream(
-            [('type1', '\n')]))
-
+        tokens = [('type1', '\n')]
+        content = [x + y for x, y in tokens]
+        lines = list(split_token_stream(tokens, content))
         assert lines == [
             [('type1', '')],
             [('type1', '')],
         ]
 
     def test_split_token_stream_single_repeat(self):
-        lines = list(split_token_stream(
-            [('type1', '\n\n\n')]))
-
+        tokens = [('type1', '\n\n\n')]
+        content = [x + y for x, y in tokens]
+        lines = list(split_token_stream(tokens, content))
         assert lines == [
             [('type1', '')],
             [('type1', '')],
@@ -111,9 +112,10 @@ class TestSplitTokenStream(object):
         ]
 
     def test_split_token_stream_multiple_repeat(self):
-        lines = list(split_token_stream(
-            [('type1', '\n\n'), ('type2', '\n\n')]))
+        tokens = [('type1', '\n\n'), ('type2', '\n\n')]
+        content = [x + y for x, y in tokens]
 
+        lines = list(split_token_stream(tokens, content))
         assert lines == [
             [('type1', '')],
             [('type1', '')],
@@ -122,6 +124,27 @@ class TestSplitTokenStream(object):
             [('type2', '')],
         ]
 
+    def test_no_tokens_by_content(self):
+        tokens = []
+        content = u'\ufeff'
+        lines = list(split_token_stream(tokens, content))
+        assert lines == [
+            [('', content)],
+        ]
+
+    def test_no_tokens_by_valid_content(self):
+        from pygments.lexers.css import CssLexer
+        content = u'\ufeff table.dataTable'
+        tokens = tokenize_string(content, CssLexer())
+
+        lines = list(split_token_stream(tokens, content))
+        assert lines == [
+            [('', u' '),
+             ('nt', u'table'),
+             ('p', u'.'),
+             ('nc', u'dataTable')],
+        ]
+
 
 class TestRollupTokens(object):