u/ewong/rhodecode-enterprise-ce-fork Commit - r1025:8ba7d016

codeblocks: add new code token rendering function that...

dan -

r1025:8ba7d016 default

parent child

rhodecode/tests/lib/test_codeblocks.py

0 created 644 +330 0

@@ -0,0 +1,330 b''
	1	# -- coding: utf-8 --
	2
	3	# Copyright (C) 2016-2016 RhodeCode GmbH
	4	#
	5	# This program is free software: you can redistribute it and/or modify
	6	# it under the terms of the GNU Affero General Public License, version 3
	7	# (only), as published by the Free Software Foundation.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16	#
	17	# This program is dual-licensed. If you wish to learn more about the
	18	# RhodeCode Enterprise Edition, including its added features, Support services,
	19	# and proprietary license terms, please see https://rhodecode.com/licenses/
	20
	21	import pytest
	22
	23	from rhodecode.lib.codeblocks import (
	24	tokenize_string, split_token_stream, rollup_tokenstream,
	25	render_tokenstream)
	26	from pygments.lexers import get_lexer_by_name
	27
	28
	29	class TestTokenizeString(object):
	30
	31	python_code = '''
	32	import this
	33
	34	var = 6
	35	print "this"
	36
	37	'''
	38
	39	def test_tokenize_as_python(self):
	40	lexer = get_lexer_by_name('python')
	41	tokens = list(tokenize_string(self.python_code, lexer))
	42
	43	assert tokens == [
	44	('', u'\n'),
	45	('', u' '),
	46	('kn', u'import'),
	47	('', u' '),
	48	('nn', u'this'),
	49	('', u'\n'),
	50	('', u'\n'),
	51	('', u' '),
	52	('n', u'var'),
	53	('', u' '),
	54	('o', u'='),
	55	('', u' '),
	56	('mi', u'6'),
	57	('', u'\n'),
	58	('', u' '),
	59	('k', u'print'),
	60	('', u' '),
	61	('s2', u'"'),
	62	('s2', u'this'),
	63	('s2', u'"'),
	64	('', u'\n'),
	65	('', u'\n'),
	66	('', u' ')
	67	]
	68
	69	def test_tokenize_as_text(self):
	70	lexer = get_lexer_by_name('text')
	71	tokens = list(tokenize_string(self.python_code, lexer))
	72
	73	assert tokens == [
	74	('',
	75	u'\n import this\n\n var = 6\n print "this"\n\n ')
	76	]
	77
	78
	79	class TestSplitTokenStream(object):
	80
	81	def test_split_token_stream(self):
	82	lines = list(split_token_stream(
	83	[('type1', 'some\ntext'), ('type2', 'more\n')]))
	84
	85	assert lines == [
	86	[('type1', u'some')],
	87	[('type1', u'text'), ('type2', u'more')],
	88	[('type2', u'')],
	89	]
	90
	91	def test_split_token_stream_other_char(self):
	92	lines = list(split_token_stream(
	93	[('type1', 'some\ntext'), ('type2', 'more\n')],
	94	split_string='m'))
	95
	96	assert lines == [
	97	[('type1', 'so')],
	98	[('type1', 'e\ntext'), ('type2', '')],
	99	[('type2', 'ore\n')],
	100	]
	101
	102	def test_split_token_stream_without_char(self):
	103	lines = list(split_token_stream(
	104	[('type1', 'some\ntext'), ('type2', 'more\n')],
	105	split_string='z'))
	106
	107	assert lines == [
	108	[('type1', 'some\ntext'), ('type2', 'more\n')]
	109	]
	110
	111	def test_split_token_stream_single(self):
	112	lines = list(split_token_stream(
	113	[('type1', '\n')], split_string='\n'))
	114
	115	assert lines == [
	116	[('type1', '')],
	117	[('type1', '')],
	118	]
	119
	120	def test_split_token_stream_single_repeat(self):
	121	lines = list(split_token_stream(
	122	[('type1', '\n\n\n')], split_string='\n'))
	123
	124	assert lines == [
	125	[('type1', '')],
	126	[('type1', '')],
	127	[('type1', '')],
	128	[('type1', '')],
	129	]
	130
	131	def test_split_token_stream_multiple_repeat(self):
	132	lines = list(split_token_stream(
	133	[('type1', '\n\n'), ('type2', '\n\n')], split_string='\n'))
	134
	135	assert lines == [
	136	[('type1', '')],
	137	[('type1', '')],
	138	[('type1', ''), ('type2', '')],
	139	[('type2', '')],
	140	[('type2', '')],
	141	]
	142
	143
	144	class TestRollupTokens(object):
	145
	146	@pytest.mark.parametrize('tokenstream,output', [
	147	([],
	148	[]),
	149	([('A', 'hell'), ('A', 'o')], [
	150	('A', [
	151	('', 'hello')]),
	152	]),
	153	([('A', 'hell'), ('B', 'o')], [
	154	('A', [
	155	('', 'hell')]),
	156	('B', [
	157	('', 'o')]),
	158	]),
	159	([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [
	160	('A', [
	161	('', 'hello')]),
	162	('B', [
	163	('', ' ')]),
	164	('A', [
	165	('', 'there')]),
	166	]),
	167	])
	168	def test_rollup_tokenstream_without_ops(self, tokenstream, output):
	169	assert list(rollup_tokenstream(tokenstream)) == output
	170
	171	@pytest.mark.parametrize('tokenstream,output', [
	172	([],
	173	[]),
	174	([('A', '', 'hell'), ('A', '', 'o')], [
	175	('A', [
	176	('', 'hello')]),
	177	]),
	178	([('A', '', 'hell'), ('B', '', 'o')], [
	179	('A', [
	180	('', 'hell')]),
	181	('B', [
	182	('', 'o')]),
	183	]),
	184	([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [
	185	('A', [
	186	('', 'h')]),
	187	('B', [
	188	('', 'e')]),
	189	('C', [
	190	('', 'y')]),
	191	]),
	192	([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [
	193	('A', [
	194	('', 'he')]),
	195	('C', [
	196	('', 'y')]),
	197	]),
	198	([('A', 'ins', 'h'), ('A', 'ins', 'e')], [
	199	('A', [
	200	('ins', 'he')
	201	]),
	202	]),
	203	([('A', 'ins', 'h'), ('A', 'del', 'e')], [
	204	('A', [
	205	('ins', 'h'),
	206	('del', 'e')
	207	]),
	208	]),
	209	([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [
	210	('A', [
	211	('ins', 'h'),
	212	]),
	213	('B', [
	214	('del', 'ey'),
	215	]),
	216	]),
	217	([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [
	218	('A', [
	219	('ins', 'h'),
	220	('del', 'e'),
	221	]),
	222	('B', [
	223	('del', 'y'),
	224	]),
	225	]),
	226	([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [
	227	('A', [
	228	('', 'some'),
	229	('ins', 'new'),
	230	('', 'name'),
	231	]),
	232	]),
	233	])
	234	def test_rollup_tokenstream_with_ops(self, tokenstream, output):
	235	assert list(rollup_tokenstream(tokenstream)) == output
	236
	237
	238	class TestRenderTokenStream(object):
	239
	240	@pytest.mark.parametrize('tokenstream,output', [
	241	(
	242	[],
	243	'',
	244	),
	245	(
	246	[('', '', u'')],
	247	'<span></span>',
	248	),
	249	(
	250	[('', '', u'text')],
	251	'<span>text</span>',
	252	),
	253	(
	254	[('A', '', u'')],
	255	'<span class="A"></span>',
	256	),
	257	(
	258	[('A', '', u'hello')],
	259	'<span class="A">hello</span>',
	260	),
	261	(
	262	[('A', '', u'hel'), ('A', '', u'lo')],
	263	'<span class="A">hello</span>',
	264	),
	265	(
	266	[('A', '', u'two\n'), ('A', '', u'lines')],
	267	'<span class="A">two<nl>\n</nl>lines</span>',
	268	),
	269	(
	270	[('A', '', u'\nthree\n'), ('A', '', u'lines')],
	271	'<span class="A"><nl>\n</nl>three<nl>\n</nl>lines</span>',
	272	),
	273	(
	274	[('', '', u'\n'), ('A', '', u'line')],
	275	'<span><nl>\n</nl></span><span class="A">line</span>',
	276	),
	277	(
	278	[('', 'ins', u'\n'), ('A', '', u'line')],
	279	'<span><ins><nl>\n</nl></ins></span><span class="A">line</span>',
	280	),
	281	(
	282	[('A', '', u'hel'), ('A', 'ins', u'lo')],
	283	'<span class="A">hel<ins>lo</ins></span>',
	284	),
	285	(
	286	[('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')],
	287	'<span class="A">hel<ins>lo</ins></span>',
	288	),
	289	(
	290	[('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')],
	291	'<span class="A">hel<ins>l</ins><del>o</del></span>',
	292	),
	293	(
	294	[('A', '', u'hel'), ('B', '', u'lo')],
	295	'<span class="A">hel</span><span class="B">lo</span>',
	296	),
	297	(
	298	[('A', '', u'hel'), ('B', 'ins', u'lo')],
	299	'<span class="A">hel</span><span class="B"><ins>lo</ins></span>',
	300	),
	301	])
	302	def test_render_tokenstream_with_ops(self, tokenstream, output):
	303	html = render_tokenstream(tokenstream)
	304	assert html == output
	305
	306	@pytest.mark.parametrize('tokenstream,output', [
	307	(
	308	[('A', u'hel'), ('A', u'lo')],
	309	'<span class="A">hello</span>',
	310	),
	311	(
	312	[('A', u'hel'), ('A', u'l'), ('A', u'o')],
	313	'<span class="A">hello</span>',
	314	),
	315	(
	316	[('A', u'hel'), ('A', u'l'), ('A', u'o')],
	317	'<span class="A">hello</span>',
	318	),
	319	(
	320	[('A', u'hel'), ('B', u'lo')],
	321	'<span class="A">hel</span><span class="B">lo</span>',
	322	),
	323	(
	324	[('A', u'hel'), ('B', u'lo')],
	325	'<span class="A">hel</span><span class="B">lo</span>',
	326	),
	327	])
	328	def test_render_tokenstream_without_ops(self, tokenstream, output):
	329	html = render_tokenstream(tokenstream)
	330	assert html == output

rhodecode/lib/codeblocks.py

0 +116 -49

@@ -18,16 +18,33 b''
18	# RhodeCode Enterprise Edition, including its added features, Support services,	18	# RhodeCode Enterprise Edition, including its added features, Support services,
19	# and proprietary license terms, please see https://rhodecode.com/licenses/	19	# and proprietary license terms, please see https://rhodecode.com/licenses/
20		20
21		21	import logging
22	from itertools import groupby	22	from itertools import groupby
23		23
24	from pygments import lex	24	from pygments import lex
25	# PYGMENTS_TOKEN_TYPES is used in a hot loop keep attribute lookups to a minimum	25	from pygments.formatters.html import _get_ttype_class as pygment_token_class
26	from pygments.token import STANDARD_TYPES as PYGMENTS_TOKEN_TYPES	26	from rhodecode.lib.helpers import get_lexer_for_filenode, html_escape
		27	from rhodecode.lib.utils2 import AttributeDict
		28	from rhodecode.lib.vcs.nodes import FileNode
		29	from pygments.lexers import get_lexer_by_name
		30
		31	plain_text_lexer = get_lexer_by_name(
		32	'text', stripall=False, stripnl=False, ensurenl=False)
		33
		34
		35	log = logging.getLogger()
27		36
28	from rhodecode.lib.helpers import get_lexer_for_filenode
29		37
30	def tokenize_file(content, lexer):	38	def filenode_as_lines_tokens(filenode, lexer=None):
		39	lexer = lexer or get_lexer_for_filenode(filenode)
		40	log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
		41	tokens = tokenize_string(filenode.content, get_lexer_for_filenode(filenode))
		42	lines = split_token_stream(tokens, split_string='\n')
		43	rv = list(lines)
		44	return rv
		45
		46
		47	def tokenize_string(content, lexer):
31	"""	48	"""
32	Use pygments to tokenize some content based on a lexer	49	Use pygments to tokenize some content based on a lexer
33	ensuring all original new lines and whitespace is preserved	50	ensuring all original new lines and whitespace is preserved
@@ -36,65 +53,33 b' def tokenize_file(content, lexer):'
36	lexer.stripall = False	53	lexer.stripall = False
37	lexer.stripnl = False	54	lexer.stripnl = False
38	lexer.ensurenl = False	55	lexer.ensurenl = False
39	~~retur~~n lex(content, lexer)	56	for token_type, token_text in lex(content, lexer):
		57	yield pygment_token_class(token_type), token_text
40		58
41		59
42	def pygment_token_class(token_type):	60	def split_token_stream(tokens, split_string=u'\n'):
43	""" Convert a pygments token type to html class name """
44
45	fname = PYGMENTS_TOKEN_TYPES.get(token_type)
46	if fname:
47	return fname
48
49	aname = ''
50	while fname is None:
51	aname = '-' + token_type[-1] + aname
52	token_type = token_type.parent
53	fname = PYGMENTS_TOKEN_TYPES.get(token_type)
54
55	return fname + aname
56
57
58	def tokens_as_lines(tokens, split_string=u'\n'):
59	"""	61	"""
60	Take a list of (TokenType, text) tuples and split them by a string	62	Take a list of (TokenType, text) tuples and split them by a string
61		63
62	eg. [(TEXT, 'some\ntext')] => [(TEXT, 'some'), (TEXT, 'text')]	64	>>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
		65	[(TEXT, 'some'), (TEXT, 'text'),
		66	(TEXT, 'more'), (TEXT, 'text')]
63	"""	67	"""
64		68
65	buffer = []	69	buffer = []
66	for token_~~type~~, token_text in tokens:	70	for token_class, token_text in tokens:
67	parts = token_text.split(split_string)	71	parts = token_text.split(split_string)
68	for part in parts[:-1]:	72	for part in parts[:-1]:
69	buffer.append((token_~~type~~, part))	73	buffer.append((token_class, part))
70	yield buffer	74	yield buffer
71	buffer = []	75	buffer = []
72		76
73	buffer.append((token_~~type~~, parts[-1]))	77	buffer.append((token_class, parts[-1]))
74		78
75	if buffer:	79	if buffer:
76	yield buffer	80	yield buffer
77		81
78		82
79	def filenode_as_lines_tokens(filenode):
80	"""
81	Return a generator of lines with pygment tokens for a filenode eg:
82
83	[
84	(1, line1_tokens_list),
85	(2, line1_tokens_list]),
86	]
87	"""
88
89	return enumerate(
90	tokens_as_lines(
91	tokenize_file(
92	filenode.content, get_lexer_for_filenode(filenode)
93	)
94	),
95	1)
96
97
98	def filenode_as_annotated_lines_tokens(filenode):	83	def filenode_as_annotated_lines_tokens(filenode):
99	"""	84	"""
100	Take a file node and return a list of annotations => lines, if no annotation	85	Take a file node and return a list of annotations => lines, if no annotation
@@ -120,9 +105,8 b' def filenode_as_annotated_lines_tokens(f'
120	]	105	]
121	"""	106	"""
122		107
		108	commit_cache = {} # cache commit_getter lookups
123		109
124	# cache commit_getter lookups
125	commit_cache = {}
126	def _get_annotation(commit_id, commit_getter):	110	def _get_annotation(commit_id, commit_getter):
127	if commit_id not in commit_cache:	111	if commit_id not in commit_cache:
128	commit_cache[commit_id] = commit_getter()	112	commit_cache[commit_id] = commit_getter()
@@ -136,7 +120,7 b' def filenode_as_annotated_lines_tokens(f'
136		120
137	annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)	121	annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
138	for line_no, tokens	122	for line_no, tokens
139	in filenode_as_lines_tokens(filenode))	123	in enumerate(filenode_as_lines_tokens(filenode), 1))
140		124
141	grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])	125	grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
142		126
@@ -145,3 +129,86 b' def filenode_as_annotated_lines_tokens(f'
145	annotation, [(line_no, tokens)	129	annotation, [(line_no, tokens)
146	for (_, line_no, tokens) in group]	130	for (_, line_no, tokens) in group]
147	)	131	)
		132
		133
		134	def render_tokenstream(tokenstream):
		135	result = []
		136	for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
		137
		138	if token_class:
		139	result.append(u'<span class="%s">' % token_class)
		140	else:
		141	result.append(u'<span>')
		142
		143	for op_tag, token_text in token_ops_texts:
		144
		145	if op_tag:
		146	result.append(u'<%s>' % op_tag)
		147
		148	escaped_text = html_escape(token_text)
		149	escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
		150
		151	result.append(escaped_text)
		152
		153	if op_tag:
		154	result.append(u'</%s>' % op_tag)
		155
		156	result.append(u'</span>')
		157
		158	html = ''.join(result)
		159	return html
		160
		161
		162	def rollup_tokenstream(tokenstream):
		163	"""
		164	Group a token stream of the format:
		165
		166	('class', 'op', 'text')
		167	or
		168	('class', 'text')
		169
		170	into
		171
		172	[('class1',
		173	[('op1', 'text'),
		174	('op2', 'text')]),
		175	('class2',
		176	[('op3', 'text')])]
		177
		178	This is used to get the minimal tags necessary when
		179	rendering to html eg for a token stream ie.
		180
		181	<span class="A"><ins>he</ins>llo</span>
		182	vs
		183	<span class="A"><ins>he</ins></span><span class="A">llo</span>
		184
		185	If a 2 tuple is passed in, the output op will be an empty string.
		186
		187	eg:
		188
		189	>>> rollup_tokenstream([('classA', '', 'h'),
		190	('classA', 'del', 'ell'),
		191	('classA', '', 'o'),
		192	('classB', '', ' '),
		193	('classA', '', 'the'),
		194	('classA', '', 're'),
		195	])
		196
		197	[('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
		198	('classB', [('', ' ')],
		199	('classA', [('', 'there')]]
		200
		201	"""
		202	if tokenstream and len(tokenstream[0]) == 2:
		203	tokenstream = ((t[0], '', t[1]) for t in tokenstream)
		204
		205	result = []
		206	for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
		207	ops = []
		208	for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
		209	text_buffer = []
		210	for t_class, t_op, t_text in token_text_list:
		211	text_buffer.append(t_text)
		212	ops.append((token_op, ''.join(text_buffer)))
		213	result.append((token_class, ops))
		214	return result

rhodecode/public/css/code-block.less

0 +14 -10

             /* START NEW CODE BLOCK CSS */
+            @cb-line-height: 18px;
+            @cb-line-code-padding: 10px;
             table.cb {
                 width: 100%;
                 border-collapse: collapse;
                 td {
                     vertical-align: top;
-                    padding: 2px 10px;
+                    padding: 0;
                     &.cb-content {
-                        white-space: pre-wrap;
-                        font-family: @font-family-monospace;
                         font-size: 12.35px;
-                        span {
+                        span.cb-code {
+                            line-height: @cb-line-height;
+                            padding-left: @cb-line-code-padding;
+                            display: block;
+                            white-space: pre-wrap;
+                            font-family: @font-family-monospace;
                             word-break: break-word;
                         }
                     }
                     &.cb-lineno {
                         padding: 0;
-                        height: 1px; /* this allows the <a> link to fill to 100% height of the td */
                         width: 50px;
                         color: rgba(0, 0, 0, 0.3);
                         text-align: right;
                         a::before {
                             content: attr(data-line-no);
                         }
-                        &.cb-line-selected {
+                        &.cb-line-selected a {
                             background: @comment-highlight-color !important;
                         }
                         a {
                             display: block;
-                            height: 100%;
+                            padding-right: @cb-line-code-padding;
+                            line-height: @cb-line-height;
                             color: rgba(0, 0, 0, 0.3);
-                            padding: 0 10px; /* vertical padding is 0 so that height: 100% works */
-                            line-height: 18px; /* use this instead of vertical padding */
                         }
                     }
                     &.cb-content {
-                        &.cb-line-selected {
+                        &.cb-line-selected .cb-code {
                             background: @comment-highlight-color !important;
                         }
                     }

rhodecode/templates/codeblocks/source.html

0 +8 -10

                                     annotation=None,
                                     bgcolor=None)">
                 <%
-                # avoid module lookups for performance
+                from rhodecode.lib.codeblocks import render_tokenstream
-                from rhodecode.lib.codeblocks import pygment_token_class
+                # avoid module lookup for performance
-                from rhodecode.lib.helpers import html_escape
+                html_escape = h.html_escape
                 %>
                 <tr class="cb-line cb-line-fresh"
                 %if annotation:
                 %if bgcolor:
                 style="background: ${bgcolor}"
                 %endif
-                >${
-                  ''.join(
+                ## newline at end is necessary for highlight to work when line is empty
-                   '<span class="%s">%s</span>' %
+                ## and for copy pasting code to work as expected
-                    (pygment_token_class(token_type), html_escape(token_text))
+                  <span class="cb-code">${render_tokenstream(tokens)|n}${'\n'}</span>
-                    for token_type, token_text in tokens) + '\n' | n
+                </td>
-                }</td>
-                ## this ugly list comp is necessary for performance
               </tr>
             </%def>

rhodecode/templates/files/files_source.html

0 +1 -1

                                 ${sourceblock.render_annotation_lines(annotation, lines, color_hasher)}
                               %endfor
                             %else:
-                              %for line_num, tokens in c.lines:
+                              %for line_num, tokens in enumerate(c.lines, 1):
                                 ${sourceblock.render_line(line_num, tokens)}
                               %endfor
                             %endif

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages