Show More
@@ -49,8 +49,9 b' def filenode_as_lines_tokens(filenode, l' | |||
|
49 | 49 | lexer = lexer or get_lexer_for_filenode(filenode) |
|
50 | 50 | log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s', |
|
51 | 51 | lexer, filenode, org_lexer) |
|
52 |
|
|
|
53 | lines = split_token_stream(tokens) | |
|
52 | content = filenode.content | |
|
53 | tokens = tokenize_string(content, lexer) | |
|
54 | lines = split_token_stream(tokens, content) | |
|
54 | 55 | rv = list(lines) |
|
55 | 56 | return rv |
|
56 | 57 | |
@@ -74,7 +75,7 b' def tokenize_string(content, lexer):' | |||
|
74 | 75 | yield pygment_token_class(token_type), token_text |
|
75 | 76 | |
|
76 | 77 | |
|
77 | def split_token_stream(tokens): | |
|
78 | def split_token_stream(tokens, content): | |
|
78 | 79 | """ |
|
79 | 80 | Take a list of (TokenType, text) tuples and split them by a string |
|
80 | 81 | |
@@ -83,18 +84,23 b' def split_token_stream(tokens):' | |||
|
83 | 84 | (TEXT, 'more'), (TEXT, 'text')] |
|
84 | 85 | """ |
|
85 | 86 | |
|
86 | buffer = [] | |
|
87 | token_buffer = [] | |
|
87 | 88 | for token_class, token_text in tokens: |
|
88 | 89 | parts = token_text.split('\n') |
|
89 | 90 | for part in parts[:-1]: |
|
90 | buffer.append((token_class, part)) | |
|
91 | yield buffer | |
|
92 | buffer = [] | |
|
91 | token_buffer.append((token_class, part)) | |
|
92 | yield token_buffer | |
|
93 | token_buffer = [] | |
|
94 | ||
|
95 | token_buffer.append((token_class, parts[-1])) | |
|
93 | 96 | |
|
94 | buffer.append((token_class, parts[-1])) | |
|
95 | ||
|
96 | if buffer: | |
|
97 | yield buffer | |
|
97 | if token_buffer: | |
|
98 | yield token_buffer | |
|
99 | elif content: | |
|
100 | # this is a special case, we have the content, but tokenization didn't produce | |
|
101 | # any results. THis can happen if know file extensions like .css have some bogus | |
|
102 | # unicode content without any newline characters | |
|
103 | yield [(pygment_token_class(Token.Text), content)] | |
|
98 | 104 | |
|
99 | 105 | |
|
100 | 106 | def filenode_as_annotated_lines_tokens(filenode): |
@@ -721,7 +727,11 b' class DiffSet(object):' | |||
|
721 | 727 | if filenode not in self.highlighted_filenodes: |
|
722 | 728 | tokenized_lines = filenode_as_lines_tokens(filenode, lexer) |
|
723 | 729 | self.highlighted_filenodes[filenode] = tokenized_lines |
|
730 | ||
|
731 | try: | |
|
724 | 732 | return self.highlighted_filenodes[filenode][line_number - 1] |
|
733 | except Exception: | |
|
734 | return [('', u'rhodecode diff rendering error')] | |
|
725 | 735 | |
|
726 | 736 | def action_to_op(self, action): |
|
727 | 737 | return { |
@@ -81,8 +81,9 b' class TestTokenizeString(object):' | |||
|
81 | 81 | class TestSplitTokenStream(object): |
|
82 | 82 | |
|
83 | 83 | def test_split_token_stream(self): |
|
84 | lines = list(split_token_stream( | |
|
85 | [('type1', 'some\ntext'), ('type2', 'more\n')])) | |
|
84 | tokens = [('type1', 'some\ntext'), ('type2', 'more\n')] | |
|
85 | content = [x + y for x, y in tokens] | |
|
86 | lines = list(split_token_stream(tokens, content)) | |
|
86 | 87 | |
|
87 | 88 | assert lines == [ |
|
88 | 89 | [('type1', u'some')], |
@@ -91,18 +92,18 b' class TestSplitTokenStream(object):' | |||
|
91 | 92 | ] |
|
92 | 93 | |
|
93 | 94 | def test_split_token_stream_single(self): |
|
94 | lines = list(split_token_stream( | |
|
95 | [('type1', '\n')])) | |
|
96 | ||
|
95 | tokens = [('type1', '\n')] | |
|
96 | content = [x + y for x, y in tokens] | |
|
97 | lines = list(split_token_stream(tokens, content)) | |
|
97 | 98 | assert lines == [ |
|
98 | 99 | [('type1', '')], |
|
99 | 100 | [('type1', '')], |
|
100 | 101 | ] |
|
101 | 102 | |
|
102 | 103 | def test_split_token_stream_single_repeat(self): |
|
103 | lines = list(split_token_stream( | |
|
104 | [('type1', '\n\n\n')])) | |
|
105 | ||
|
104 | tokens = [('type1', '\n\n\n')] | |
|
105 | content = [x + y for x, y in tokens] | |
|
106 | lines = list(split_token_stream(tokens, content)) | |
|
106 | 107 | assert lines == [ |
|
107 | 108 | [('type1', '')], |
|
108 | 109 | [('type1', '')], |
@@ -111,9 +112,10 b' class TestSplitTokenStream(object):' | |||
|
111 | 112 | ] |
|
112 | 113 | |
|
113 | 114 | def test_split_token_stream_multiple_repeat(self): |
|
114 | lines = list(split_token_stream( | |
|
115 | [('type1', '\n\n'), ('type2', '\n\n')])) | |
|
115 | tokens = [('type1', '\n\n'), ('type2', '\n\n')] | |
|
116 | content = [x + y for x, y in tokens] | |
|
116 | 117 | |
|
118 | lines = list(split_token_stream(tokens, content)) | |
|
117 | 119 | assert lines == [ |
|
118 | 120 | [('type1', '')], |
|
119 | 121 | [('type1', '')], |
@@ -122,6 +124,27 b' class TestSplitTokenStream(object):' | |||
|
122 | 124 | [('type2', '')], |
|
123 | 125 | ] |
|
124 | 126 | |
|
127 | def test_no_tokens_by_content(self): | |
|
128 | tokens = [] | |
|
129 | content = u'\ufeff' | |
|
130 | lines = list(split_token_stream(tokens, content)) | |
|
131 | assert lines == [ | |
|
132 | [('', content)], | |
|
133 | ] | |
|
134 | ||
|
135 | def test_no_tokens_by_valid_content(self): | |
|
136 | from pygments.lexers.css import CssLexer | |
|
137 | content = u'\ufeff table.dataTable' | |
|
138 | tokens = tokenize_string(content, CssLexer()) | |
|
139 | ||
|
140 | lines = list(split_token_stream(tokens, content)) | |
|
141 | assert lines == [ | |
|
142 | [('', u' '), | |
|
143 | ('nt', u'table'), | |
|
144 | ('p', u'.'), | |
|
145 | ('nc', u'dataTable')], | |
|
146 | ] | |
|
147 | ||
|
125 | 148 | |
|
126 | 149 | class TestRollupTokens(object): |
|
127 | 150 |
General Comments 0
You need to be logged in to leave comments.
Login now