Show More
@@ -49,8 +49,9 b' def filenode_as_lines_tokens(filenode, l' | |||||
49 | lexer = lexer or get_lexer_for_filenode(filenode) |
|
49 | lexer = lexer or get_lexer_for_filenode(filenode) | |
50 | log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s', |
|
50 | log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s', | |
51 | lexer, filenode, org_lexer) |
|
51 | lexer, filenode, org_lexer) | |
52 |
|
|
52 | content = filenode.content | |
53 | lines = split_token_stream(tokens) |
|
53 | tokens = tokenize_string(content, lexer) | |
|
54 | lines = split_token_stream(tokens, content) | |||
54 | rv = list(lines) |
|
55 | rv = list(lines) | |
55 | return rv |
|
56 | return rv | |
56 |
|
57 | |||
@@ -74,7 +75,7 b' def tokenize_string(content, lexer):' | |||||
74 | yield pygment_token_class(token_type), token_text |
|
75 | yield pygment_token_class(token_type), token_text | |
75 |
|
76 | |||
76 |
|
77 | |||
77 | def split_token_stream(tokens): |
|
78 | def split_token_stream(tokens, content): | |
78 | """ |
|
79 | """ | |
79 | Take a list of (TokenType, text) tuples and split them by a string |
|
80 | Take a list of (TokenType, text) tuples and split them by a string | |
80 |
|
81 | |||
@@ -83,18 +84,23 b' def split_token_stream(tokens):' | |||||
83 | (TEXT, 'more'), (TEXT, 'text')] |
|
84 | (TEXT, 'more'), (TEXT, 'text')] | |
84 | """ |
|
85 | """ | |
85 |
|
86 | |||
86 | buffer = [] |
|
87 | token_buffer = [] | |
87 | for token_class, token_text in tokens: |
|
88 | for token_class, token_text in tokens: | |
88 | parts = token_text.split('\n') |
|
89 | parts = token_text.split('\n') | |
89 | for part in parts[:-1]: |
|
90 | for part in parts[:-1]: | |
90 | buffer.append((token_class, part)) |
|
91 | token_buffer.append((token_class, part)) | |
91 | yield buffer |
|
92 | yield token_buffer | |
92 | buffer = [] |
|
93 | token_buffer = [] | |
|
94 | ||||
|
95 | token_buffer.append((token_class, parts[-1])) | |||
93 |
|
96 | |||
94 | buffer.append((token_class, parts[-1])) |
|
97 | if token_buffer: | |
95 |
|
98 | yield token_buffer | ||
96 | if buffer: |
|
99 | elif content: | |
97 | yield buffer |
|
100 | # this is a special case, we have the content, but tokenization didn't produce | |
|
101 | # any results. THis can happen if know file extensions like .css have some bogus | |||
|
102 | # unicode content without any newline characters | |||
|
103 | yield [(pygment_token_class(Token.Text), content)] | |||
98 |
|
104 | |||
99 |
|
105 | |||
100 | def filenode_as_annotated_lines_tokens(filenode): |
|
106 | def filenode_as_annotated_lines_tokens(filenode): | |
@@ -721,7 +727,11 b' class DiffSet(object):' | |||||
721 | if filenode not in self.highlighted_filenodes: |
|
727 | if filenode not in self.highlighted_filenodes: | |
722 | tokenized_lines = filenode_as_lines_tokens(filenode, lexer) |
|
728 | tokenized_lines = filenode_as_lines_tokens(filenode, lexer) | |
723 | self.highlighted_filenodes[filenode] = tokenized_lines |
|
729 | self.highlighted_filenodes[filenode] = tokenized_lines | |
724 | return self.highlighted_filenodes[filenode][line_number - 1] |
|
730 | ||
|
731 | try: | |||
|
732 | return self.highlighted_filenodes[filenode][line_number - 1] | |||
|
733 | except Exception: | |||
|
734 | return [('', u'rhodecode diff rendering error')] | |||
725 |
|
735 | |||
726 | def action_to_op(self, action): |
|
736 | def action_to_op(self, action): | |
727 | return { |
|
737 | return { |
@@ -81,8 +81,9 b' class TestTokenizeString(object):' | |||||
81 | class TestSplitTokenStream(object): |
|
81 | class TestSplitTokenStream(object): | |
82 |
|
82 | |||
83 | def test_split_token_stream(self): |
|
83 | def test_split_token_stream(self): | |
84 | lines = list(split_token_stream( |
|
84 | tokens = [('type1', 'some\ntext'), ('type2', 'more\n')] | |
85 | [('type1', 'some\ntext'), ('type2', 'more\n')])) |
|
85 | content = [x + y for x, y in tokens] | |
|
86 | lines = list(split_token_stream(tokens, content)) | |||
86 |
|
87 | |||
87 | assert lines == [ |
|
88 | assert lines == [ | |
88 | [('type1', u'some')], |
|
89 | [('type1', u'some')], | |
@@ -91,18 +92,18 b' class TestSplitTokenStream(object):' | |||||
91 | ] |
|
92 | ] | |
92 |
|
93 | |||
93 | def test_split_token_stream_single(self): |
|
94 | def test_split_token_stream_single(self): | |
94 | lines = list(split_token_stream( |
|
95 | tokens = [('type1', '\n')] | |
95 | [('type1', '\n')])) |
|
96 | content = [x + y for x, y in tokens] | |
96 |
|
97 | lines = list(split_token_stream(tokens, content)) | ||
97 | assert lines == [ |
|
98 | assert lines == [ | |
98 | [('type1', '')], |
|
99 | [('type1', '')], | |
99 | [('type1', '')], |
|
100 | [('type1', '')], | |
100 | ] |
|
101 | ] | |
101 |
|
102 | |||
102 | def test_split_token_stream_single_repeat(self): |
|
103 | def test_split_token_stream_single_repeat(self): | |
103 | lines = list(split_token_stream( |
|
104 | tokens = [('type1', '\n\n\n')] | |
104 | [('type1', '\n\n\n')])) |
|
105 | content = [x + y for x, y in tokens] | |
105 |
|
106 | lines = list(split_token_stream(tokens, content)) | ||
106 | assert lines == [ |
|
107 | assert lines == [ | |
107 | [('type1', '')], |
|
108 | [('type1', '')], | |
108 | [('type1', '')], |
|
109 | [('type1', '')], | |
@@ -111,9 +112,10 b' class TestSplitTokenStream(object):' | |||||
111 | ] |
|
112 | ] | |
112 |
|
113 | |||
113 | def test_split_token_stream_multiple_repeat(self): |
|
114 | def test_split_token_stream_multiple_repeat(self): | |
114 | lines = list(split_token_stream( |
|
115 | tokens = [('type1', '\n\n'), ('type2', '\n\n')] | |
115 | [('type1', '\n\n'), ('type2', '\n\n')])) |
|
116 | content = [x + y for x, y in tokens] | |
116 |
|
117 | |||
|
118 | lines = list(split_token_stream(tokens, content)) | |||
117 | assert lines == [ |
|
119 | assert lines == [ | |
118 | [('type1', '')], |
|
120 | [('type1', '')], | |
119 | [('type1', '')], |
|
121 | [('type1', '')], | |
@@ -122,6 +124,27 b' class TestSplitTokenStream(object):' | |||||
122 | [('type2', '')], |
|
124 | [('type2', '')], | |
123 | ] |
|
125 | ] | |
124 |
|
126 | |||
|
127 | def test_no_tokens_by_content(self): | |||
|
128 | tokens = [] | |||
|
129 | content = u'\ufeff' | |||
|
130 | lines = list(split_token_stream(tokens, content)) | |||
|
131 | assert lines == [ | |||
|
132 | [('', content)], | |||
|
133 | ] | |||
|
134 | ||||
|
135 | def test_no_tokens_by_valid_content(self): | |||
|
136 | from pygments.lexers.css import CssLexer | |||
|
137 | content = u'\ufeff table.dataTable' | |||
|
138 | tokens = tokenize_string(content, CssLexer()) | |||
|
139 | ||||
|
140 | lines = list(split_token_stream(tokens, content)) | |||
|
141 | assert lines == [ | |||
|
142 | [('', u' '), | |||
|
143 | ('nt', u'table'), | |||
|
144 | ('p', u'.'), | |||
|
145 | ('nc', u'dataTable')], | |||
|
146 | ] | |||
|
147 | ||||
125 |
|
148 | |||
126 | class TestRollupTokens(object): |
|
149 | class TestRollupTokens(object): | |
127 |
|
150 |
General Comments 0
You need to be logged in to leave comments.
Login now