##// END OF EJS Templates
codeblocks: add new code token rendering function that...
dan -
r1025:8ba7d016 default
parent child Browse files
Show More
@@ -0,0 +1,330 b''
1 # -*- coding: utf-8 -*-
2
3 # Copyright (C) 2016-2016 RhodeCode GmbH
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
21 import pytest
22
23 from rhodecode.lib.codeblocks import (
24 tokenize_string, split_token_stream, rollup_tokenstream,
25 render_tokenstream)
26 from pygments.lexers import get_lexer_by_name
27
28
29 class TestTokenizeString(object):
30
31 python_code = '''
32 import this
33
34 var = 6
35 print "this"
36
37 '''
38
39 def test_tokenize_as_python(self):
40 lexer = get_lexer_by_name('python')
41 tokens = list(tokenize_string(self.python_code, lexer))
42
43 assert tokens == [
44 ('', u'\n'),
45 ('', u' '),
46 ('kn', u'import'),
47 ('', u' '),
48 ('nn', u'this'),
49 ('', u'\n'),
50 ('', u'\n'),
51 ('', u' '),
52 ('n', u'var'),
53 ('', u' '),
54 ('o', u'='),
55 ('', u' '),
56 ('mi', u'6'),
57 ('', u'\n'),
58 ('', u' '),
59 ('k', u'print'),
60 ('', u' '),
61 ('s2', u'"'),
62 ('s2', u'this'),
63 ('s2', u'"'),
64 ('', u'\n'),
65 ('', u'\n'),
66 ('', u' ')
67 ]
68
69 def test_tokenize_as_text(self):
70 lexer = get_lexer_by_name('text')
71 tokens = list(tokenize_string(self.python_code, lexer))
72
73 assert tokens == [
74 ('',
75 u'\n import this\n\n var = 6\n print "this"\n\n ')
76 ]
77
78
79 class TestSplitTokenStream(object):
80
81 def test_split_token_stream(self):
82 lines = list(split_token_stream(
83 [('type1', 'some\ntext'), ('type2', 'more\n')]))
84
85 assert lines == [
86 [('type1', u'some')],
87 [('type1', u'text'), ('type2', u'more')],
88 [('type2', u'')],
89 ]
90
91 def test_split_token_stream_other_char(self):
92 lines = list(split_token_stream(
93 [('type1', 'some\ntext'), ('type2', 'more\n')],
94 split_string='m'))
95
96 assert lines == [
97 [('type1', 'so')],
98 [('type1', 'e\ntext'), ('type2', '')],
99 [('type2', 'ore\n')],
100 ]
101
102 def test_split_token_stream_without_char(self):
103 lines = list(split_token_stream(
104 [('type1', 'some\ntext'), ('type2', 'more\n')],
105 split_string='z'))
106
107 assert lines == [
108 [('type1', 'some\ntext'), ('type2', 'more\n')]
109 ]
110
111 def test_split_token_stream_single(self):
112 lines = list(split_token_stream(
113 [('type1', '\n')], split_string='\n'))
114
115 assert lines == [
116 [('type1', '')],
117 [('type1', '')],
118 ]
119
120 def test_split_token_stream_single_repeat(self):
121 lines = list(split_token_stream(
122 [('type1', '\n\n\n')], split_string='\n'))
123
124 assert lines == [
125 [('type1', '')],
126 [('type1', '')],
127 [('type1', '')],
128 [('type1', '')],
129 ]
130
131 def test_split_token_stream_multiple_repeat(self):
132 lines = list(split_token_stream(
133 [('type1', '\n\n'), ('type2', '\n\n')], split_string='\n'))
134
135 assert lines == [
136 [('type1', '')],
137 [('type1', '')],
138 [('type1', ''), ('type2', '')],
139 [('type2', '')],
140 [('type2', '')],
141 ]
142
143
144 class TestRollupTokens(object):
145
146 @pytest.mark.parametrize('tokenstream,output', [
147 ([],
148 []),
149 ([('A', 'hell'), ('A', 'o')], [
150 ('A', [
151 ('', 'hello')]),
152 ]),
153 ([('A', 'hell'), ('B', 'o')], [
154 ('A', [
155 ('', 'hell')]),
156 ('B', [
157 ('', 'o')]),
158 ]),
159 ([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [
160 ('A', [
161 ('', 'hello')]),
162 ('B', [
163 ('', ' ')]),
164 ('A', [
165 ('', 'there')]),
166 ]),
167 ])
168 def test_rollup_tokenstream_without_ops(self, tokenstream, output):
169 assert list(rollup_tokenstream(tokenstream)) == output
170
171 @pytest.mark.parametrize('tokenstream,output', [
172 ([],
173 []),
174 ([('A', '', 'hell'), ('A', '', 'o')], [
175 ('A', [
176 ('', 'hello')]),
177 ]),
178 ([('A', '', 'hell'), ('B', '', 'o')], [
179 ('A', [
180 ('', 'hell')]),
181 ('B', [
182 ('', 'o')]),
183 ]),
184 ([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [
185 ('A', [
186 ('', 'h')]),
187 ('B', [
188 ('', 'e')]),
189 ('C', [
190 ('', 'y')]),
191 ]),
192 ([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [
193 ('A', [
194 ('', 'he')]),
195 ('C', [
196 ('', 'y')]),
197 ]),
198 ([('A', 'ins', 'h'), ('A', 'ins', 'e')], [
199 ('A', [
200 ('ins', 'he')
201 ]),
202 ]),
203 ([('A', 'ins', 'h'), ('A', 'del', 'e')], [
204 ('A', [
205 ('ins', 'h'),
206 ('del', 'e')
207 ]),
208 ]),
209 ([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [
210 ('A', [
211 ('ins', 'h'),
212 ]),
213 ('B', [
214 ('del', 'ey'),
215 ]),
216 ]),
217 ([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [
218 ('A', [
219 ('ins', 'h'),
220 ('del', 'e'),
221 ]),
222 ('B', [
223 ('del', 'y'),
224 ]),
225 ]),
226 ([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [
227 ('A', [
228 ('', 'some'),
229 ('ins', 'new'),
230 ('', 'name'),
231 ]),
232 ]),
233 ])
234 def test_rollup_tokenstream_with_ops(self, tokenstream, output):
235 assert list(rollup_tokenstream(tokenstream)) == output
236
237
238 class TestRenderTokenStream(object):
239
240 @pytest.mark.parametrize('tokenstream,output', [
241 (
242 [],
243 '',
244 ),
245 (
246 [('', '', u'')],
247 '<span></span>',
248 ),
249 (
250 [('', '', u'text')],
251 '<span>text</span>',
252 ),
253 (
254 [('A', '', u'')],
255 '<span class="A"></span>',
256 ),
257 (
258 [('A', '', u'hello')],
259 '<span class="A">hello</span>',
260 ),
261 (
262 [('A', '', u'hel'), ('A', '', u'lo')],
263 '<span class="A">hello</span>',
264 ),
265 (
266 [('A', '', u'two\n'), ('A', '', u'lines')],
267 '<span class="A">two<nl>\n</nl>lines</span>',
268 ),
269 (
270 [('A', '', u'\nthree\n'), ('A', '', u'lines')],
271 '<span class="A"><nl>\n</nl>three<nl>\n</nl>lines</span>',
272 ),
273 (
274 [('', '', u'\n'), ('A', '', u'line')],
275 '<span><nl>\n</nl></span><span class="A">line</span>',
276 ),
277 (
278 [('', 'ins', u'\n'), ('A', '', u'line')],
279 '<span><ins><nl>\n</nl></ins></span><span class="A">line</span>',
280 ),
281 (
282 [('A', '', u'hel'), ('A', 'ins', u'lo')],
283 '<span class="A">hel<ins>lo</ins></span>',
284 ),
285 (
286 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')],
287 '<span class="A">hel<ins>lo</ins></span>',
288 ),
289 (
290 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')],
291 '<span class="A">hel<ins>l</ins><del>o</del></span>',
292 ),
293 (
294 [('A', '', u'hel'), ('B', '', u'lo')],
295 '<span class="A">hel</span><span class="B">lo</span>',
296 ),
297 (
298 [('A', '', u'hel'), ('B', 'ins', u'lo')],
299 '<span class="A">hel</span><span class="B"><ins>lo</ins></span>',
300 ),
301 ])
302 def test_render_tokenstream_with_ops(self, tokenstream, output):
303 html = render_tokenstream(tokenstream)
304 assert html == output
305
306 @pytest.mark.parametrize('tokenstream,output', [
307 (
308 [('A', u'hel'), ('A', u'lo')],
309 '<span class="A">hello</span>',
310 ),
311 (
312 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
313 '<span class="A">hello</span>',
314 ),
315 (
316 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
317 '<span class="A">hello</span>',
318 ),
319 (
320 [('A', u'hel'), ('B', u'lo')],
321 '<span class="A">hel</span><span class="B">lo</span>',
322 ),
323 (
324 [('A', u'hel'), ('B', u'lo')],
325 '<span class="A">hel</span><span class="B">lo</span>',
326 ),
327 ])
328 def test_render_tokenstream_without_ops(self, tokenstream, output):
329 html = render_tokenstream(tokenstream)
330 assert html == output
@@ -18,16 +18,33 b''
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21
21 import logging
22 22 from itertools import groupby
23 23
24 24 from pygments import lex
25 # PYGMENTS_TOKEN_TYPES is used in a hot loop keep attribute lookups to a minimum
26 from pygments.token import STANDARD_TYPES as PYGMENTS_TOKEN_TYPES
25 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from rhodecode.lib.helpers import get_lexer_for_filenode, html_escape
27 from rhodecode.lib.utils2 import AttributeDict
28 from rhodecode.lib.vcs.nodes import FileNode
29 from pygments.lexers import get_lexer_by_name
30
31 plain_text_lexer = get_lexer_by_name(
32 'text', stripall=False, stripnl=False, ensurenl=False)
33
34
35 log = logging.getLogger()
27 36
28 from rhodecode.lib.helpers import get_lexer_for_filenode
29 37
30 def tokenize_file(content, lexer):
38 def filenode_as_lines_tokens(filenode, lexer=None):
39 lexer = lexer or get_lexer_for_filenode(filenode)
40 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
41 tokens = tokenize_string(filenode.content, get_lexer_for_filenode(filenode))
42 lines = split_token_stream(tokens, split_string='\n')
43 rv = list(lines)
44 return rv
45
46
47 def tokenize_string(content, lexer):
31 48 """
32 49 Use pygments to tokenize some content based on a lexer
33 50 ensuring all original new lines and whitespace is preserved
@@ -36,65 +53,33 b' def tokenize_file(content, lexer):'
36 53 lexer.stripall = False
37 54 lexer.stripnl = False
38 55 lexer.ensurenl = False
39 return lex(content, lexer)
56 for token_type, token_text in lex(content, lexer):
57 yield pygment_token_class(token_type), token_text
40 58
41 59
42 def pygment_token_class(token_type):
43 """ Convert a pygments token type to html class name """
44
45 fname = PYGMENTS_TOKEN_TYPES.get(token_type)
46 if fname:
47 return fname
48
49 aname = ''
50 while fname is None:
51 aname = '-' + token_type[-1] + aname
52 token_type = token_type.parent
53 fname = PYGMENTS_TOKEN_TYPES.get(token_type)
54
55 return fname + aname
56
57
58 def tokens_as_lines(tokens, split_string=u'\n'):
60 def split_token_stream(tokens, split_string=u'\n'):
59 61 """
60 62 Take a list of (TokenType, text) tuples and split them by a string
61 63
62 eg. [(TEXT, 'some\ntext')] => [(TEXT, 'some'), (TEXT, 'text')]
64 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
65 [(TEXT, 'some'), (TEXT, 'text'),
66 (TEXT, 'more'), (TEXT, 'text')]
63 67 """
64 68
65 69 buffer = []
66 for token_type, token_text in tokens:
70 for token_class, token_text in tokens:
67 71 parts = token_text.split(split_string)
68 72 for part in parts[:-1]:
69 buffer.append((token_type, part))
73 buffer.append((token_class, part))
70 74 yield buffer
71 75 buffer = []
72 76
73 buffer.append((token_type, parts[-1]))
77 buffer.append((token_class, parts[-1]))
74 78
75 79 if buffer:
76 80 yield buffer
77 81
78 82
79 def filenode_as_lines_tokens(filenode):
80 """
81 Return a generator of lines with pygment tokens for a filenode eg:
82
83 [
84 (1, line1_tokens_list),
85 (2, line1_tokens_list]),
86 ]
87 """
88
89 return enumerate(
90 tokens_as_lines(
91 tokenize_file(
92 filenode.content, get_lexer_for_filenode(filenode)
93 )
94 ),
95 1)
96
97
98 83 def filenode_as_annotated_lines_tokens(filenode):
99 84 """
100 85 Take a file node and return a list of annotations => lines, if no annotation
@@ -120,9 +105,8 b' def filenode_as_annotated_lines_tokens(f'
120 105 ]
121 106 """
122 107
108 commit_cache = {} # cache commit_getter lookups
123 109
124 # cache commit_getter lookups
125 commit_cache = {}
126 110 def _get_annotation(commit_id, commit_getter):
127 111 if commit_id not in commit_cache:
128 112 commit_cache[commit_id] = commit_getter()
@@ -136,7 +120,7 b' def filenode_as_annotated_lines_tokens(f'
136 120
137 121 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
138 122 for line_no, tokens
139 in filenode_as_lines_tokens(filenode))
123 in enumerate(filenode_as_lines_tokens(filenode), 1))
140 124
141 125 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
142 126
@@ -145,3 +129,86 b' def filenode_as_annotated_lines_tokens(f'
145 129 annotation, [(line_no, tokens)
146 130 for (_, line_no, tokens) in group]
147 131 )
132
133
134 def render_tokenstream(tokenstream):
135 result = []
136 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
137
138 if token_class:
139 result.append(u'<span class="%s">' % token_class)
140 else:
141 result.append(u'<span>')
142
143 for op_tag, token_text in token_ops_texts:
144
145 if op_tag:
146 result.append(u'<%s>' % op_tag)
147
148 escaped_text = html_escape(token_text)
149 escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
150
151 result.append(escaped_text)
152
153 if op_tag:
154 result.append(u'</%s>' % op_tag)
155
156 result.append(u'</span>')
157
158 html = ''.join(result)
159 return html
160
161
162 def rollup_tokenstream(tokenstream):
163 """
164 Group a token stream of the format:
165
166 ('class', 'op', 'text')
167 or
168 ('class', 'text')
169
170 into
171
172 [('class1',
173 [('op1', 'text'),
174 ('op2', 'text')]),
175 ('class2',
176 [('op3', 'text')])]
177
178 This is used to get the minimal tags necessary when
179 rendering to html eg for a token stream ie.
180
181 <span class="A"><ins>he</ins>llo</span>
182 vs
183 <span class="A"><ins>he</ins></span><span class="A">llo</span>
184
185 If a 2 tuple is passed in, the output op will be an empty string.
186
187 eg:
188
189 >>> rollup_tokenstream([('classA', '', 'h'),
190 ('classA', 'del', 'ell'),
191 ('classA', '', 'o'),
192 ('classB', '', ' '),
193 ('classA', '', 'the'),
194 ('classA', '', 're'),
195 ])
196
197 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
198 ('classB', [('', ' ')],
199 ('classA', [('', 'there')]]
200
201 """
202 if tokenstream and len(tokenstream[0]) == 2:
203 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
204
205 result = []
206 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
207 ops = []
208 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
209 text_buffer = []
210 for t_class, t_op, t_text in token_text_list:
211 text_buffer.append(t_text)
212 ops.append((token_op, ''.join(text_buffer)))
213 result.append((token_class, ops))
214 return result
@@ -644,6 +644,9 b' pre.literal-block, .codehilite pre{'
644 644
645 645 /* START NEW CODE BLOCK CSS */
646 646
647 @cb-line-height: 18px;
648 @cb-line-code-padding: 10px;
649
647 650 table.cb {
648 651 width: 100%;
649 652 border-collapse: collapse;
@@ -678,21 +681,23 b' table.cb {'
678 681
679 682 td {
680 683 vertical-align: top;
681 padding: 2px 10px;
684 padding: 0;
682 685
683 686 &.cb-content {
687 font-size: 12.35px;
688
689 span.cb-code {
690 line-height: @cb-line-height;
691 padding-left: @cb-line-code-padding;
692 display: block;
684 693 white-space: pre-wrap;
685 694 font-family: @font-family-monospace;
686 font-size: 12.35px;
687
688 span {
689 695 word-break: break-word;
690 696 }
691 697 }
692 698
693 699 &.cb-lineno {
694 700 padding: 0;
695 height: 1px; /* this allows the <a> link to fill to 100% height of the td */
696 701 width: 50px;
697 702 color: rgba(0, 0, 0, 0.3);
698 703 text-align: right;
@@ -702,21 +707,20 b' table.cb {'
702 707 a::before {
703 708 content: attr(data-line-no);
704 709 }
705 &.cb-line-selected {
710 &.cb-line-selected a {
706 711 background: @comment-highlight-color !important;
707 712 }
708 713
709 714 a {
710 715 display: block;
711 height: 100%;
716 padding-right: @cb-line-code-padding;
717 line-height: @cb-line-height;
712 718 color: rgba(0, 0, 0, 0.3);
713 padding: 0 10px; /* vertical padding is 0 so that height: 100% works */
714 line-height: 18px; /* use this instead of vertical padding */
715 719 }
716 720 }
717 721
718 722 &.cb-content {
719 &.cb-line-selected {
723 &.cb-line-selected .cb-code {
720 724 background: @comment-highlight-color !important;
721 725 }
722 726 }
@@ -2,9 +2,9 b''
2 2 annotation=None,
3 3 bgcolor=None)">
4 4 <%
5 # avoid module lookups for performance
6 from rhodecode.lib.codeblocks import pygment_token_class
7 from rhodecode.lib.helpers import html_escape
5 from rhodecode.lib.codeblocks import render_tokenstream
6 # avoid module lookup for performance
7 html_escape = h.html_escape
8 8 %>
9 9 <tr class="cb-line cb-line-fresh"
10 10 %if annotation:
@@ -18,13 +18,11 b''
18 18 %if bgcolor:
19 19 style="background: ${bgcolor}"
20 20 %endif
21 >${
22 ''.join(
23 '<span class="%s">%s</span>' %
24 (pygment_token_class(token_type), html_escape(token_text))
25 for token_type, token_text in tokens) + '\n' | n
26 }</td>
27 ## this ugly list comp is necessary for performance
21 >
22 ## newline at end is necessary for highlight to work when line is empty
23 ## and for copy pasting code to work as expected
24 <span class="cb-code">${render_tokenstream(tokens)|n}${'\n'}</span>
25 </td>
28 26 </tr>
29 27 </%def>
30 28
@@ -62,7 +62,7 b''
62 62 ${sourceblock.render_annotation_lines(annotation, lines, color_hasher)}
63 63 %endfor
64 64 %else:
65 %for line_num, tokens in c.lines:
65 %for line_num, tokens in enumerate(c.lines, 1):
66 66 ${sourceblock.render_line(line_num, tokens)}
67 67 %endfor
68 68 %endif
General Comments 0
You need to be logged in to leave comments. Login now