diffs: fixed case of bogus files diff rendering...
ergo -
r3444:e5ce0962 default
Not Reviewed
Show More
Add another comment
TODOs: 0 unresolved 0 Resolved
COMMENTS: 0 General 0 Inline
@@ -49,8 +49,9
49 lexer = lexer or get_lexer_for_filenode(filenode)
49 lexer = lexer or get_lexer_for_filenode(filenode)
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
51 lexer, filenode, org_lexer)
51 lexer, filenode, org_lexer)
52 tokens = tokenize_string(filenode.content, lexer)
52 content = filenode.content
53 lines = split_token_stream(tokens)
53 tokens = tokenize_string(content, lexer)
54 lines = split_token_stream(tokens, content)
54 rv = list(lines)
55 rv = list(lines)
55 return rv
56 return rv
56
57
@@ -74,7 +75,7
74 yield pygment_token_class(token_type), token_text
75 yield pygment_token_class(token_type), token_text
75
76
76
77
77 def split_token_stream(tokens):
78 def split_token_stream(tokens, content):
78 """
79 """
79 Take a list of (TokenType, text) tuples and split them by a string
80 Take a list of (TokenType, text) tuples and split them by a string
80
81
@@ -83,18 +84,23
83 (TEXT, 'more'), (TEXT, 'text')]
84 (TEXT, 'more'), (TEXT, 'text')]
84 """
85 """
85
86
86 buffer = []
87 token_buffer = []
87 for token_class, token_text in tokens:
88 for token_class, token_text in tokens:
88 parts = token_text.split('\n')
89 parts = token_text.split('\n')
89 for part in parts[:-1]:
90 for part in parts[:-1]:
90 buffer.append((token_class, part))
91 token_buffer.append((token_class, part))
91 yield buffer
92 yield token_buffer
92 buffer = []
93 token_buffer = []
94
95 token_buffer.append((token_class, parts[-1]))
93
96
94 buffer.append((token_class, parts[-1]))
97 if token_buffer:
95
98 yield token_buffer
96 if buffer:
99 elif content:
97 yield buffer
100 # this is a special case, we have the content, but tokenization didn't produce
101 # any results. THis can happen if know file extensions like .css have some bogus
102 # unicode content without any newline characters
103 yield [(pygment_token_class(Token.Text), content)]
98
104
99
105
100 def filenode_as_annotated_lines_tokens(filenode):
106 def filenode_as_annotated_lines_tokens(filenode):
@@ -721,7 +727,11
721 if filenode not in self.highlighted_filenodes:
727 if filenode not in self.highlighted_filenodes:
722 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
728 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
723 self.highlighted_filenodes[filenode] = tokenized_lines
729 self.highlighted_filenodes[filenode] = tokenized_lines
724 return self.highlighted_filenodes[filenode][line_number - 1]
730
731 try:
732 return self.highlighted_filenodes[filenode][line_number - 1]
733 except Exception:
734 return [('', u'rhodecode diff rendering error')]
725
735
726 def action_to_op(self, action):
736 def action_to_op(self, action):
727 return {
737 return {
@@ -81,8 +81,9
81 class TestSplitTokenStream(object):
81 class TestSplitTokenStream(object):
82
82
83 def test_split_token_stream(self):
83 def test_split_token_stream(self):
84 lines = list(split_token_stream(
84 tokens = [('type1', 'some\ntext'), ('type2', 'more\n')]
85 [('type1', 'some\ntext'), ('type2', 'more\n')]))
85 content = [x + y for x, y in tokens]
86 lines = list(split_token_stream(tokens, content))
86
87
87 assert lines == [
88 assert lines == [
88 [('type1', u'some')],
89 [('type1', u'some')],
@@ -91,18 +92,18
91 ]
92 ]
92
93
93 def test_split_token_stream_single(self):
94 def test_split_token_stream_single(self):
94 lines = list(split_token_stream(
95 tokens = [('type1', '\n')]
95 [('type1', '\n')]))
96 content = [x + y for x, y in tokens]
96
97 lines = list(split_token_stream(tokens, content))
97 assert lines == [
98 assert lines == [
98 [('type1', '')],
99 [('type1', '')],
99 [('type1', '')],
100 [('type1', '')],
100 ]
101 ]
101
102
102 def test_split_token_stream_single_repeat(self):
103 def test_split_token_stream_single_repeat(self):
103 lines = list(split_token_stream(
104 tokens = [('type1', '\n\n\n')]
104 [('type1', '\n\n\n')]))
105 content = [x + y for x, y in tokens]
105
106 lines = list(split_token_stream(tokens, content))
106 assert lines == [
107 assert lines == [
107 [('type1', '')],
108 [('type1', '')],
108 [('type1', '')],
109 [('type1', '')],
@@ -111,9 +112,10
111 ]
112 ]
112
113
113 def test_split_token_stream_multiple_repeat(self):
114 def test_split_token_stream_multiple_repeat(self):
114 lines = list(split_token_stream(
115 tokens = [('type1', '\n\n'), ('type2', '\n\n')]
115 [('type1', '\n\n'), ('type2', '\n\n')]))
116 content = [x + y for x, y in tokens]
116
117
118 lines = list(split_token_stream(tokens, content))
117 assert lines == [
119 assert lines == [
118 [('type1', '')],
120 [('type1', '')],
119 [('type1', '')],
121 [('type1', '')],
@@ -122,6 +124,27
122 [('type2', '')],
124 [('type2', '')],
123 ]
125 ]
124
126
127 def test_no_tokens_by_content(self):
128 tokens = []
129 content = u'\ufeff'
130 lines = list(split_token_stream(tokens, content))
131 assert lines == [
132 [('', content)],
133 ]
134
135 def test_no_tokens_by_valid_content(self):
136 from pygments.lexers.css import CssLexer
137 content = u'\ufeff table.dataTable'
138 tokens = tokenize_string(content, CssLexer())
139
140 lines = list(split_token_stream(tokens, content))
141 assert lines == [
142 [('', u' '),
143 ('nt', u'table'),
144 ('p', u'.'),
145 ('nc', u'dataTable')],
146 ]
147
125
148
126 class TestRollupTokens(object):
149 class TestRollupTokens(object):
127
150
Comments 0
You need to be logged in to leave comments. Login now