rhodecode-enterprise-ce Commit - r1025:8ba7d016

codeblocks: add new code token rendering function that...

dan -

r1025:8ba7d016 default

parent child

rhodecode/tests/lib/test_codeblocks.py

0 created 644 +330 0

			@@ -0,0 +1,330 b''
		1	# -- coding: utf-8 --
		2
		3	# Copyright (C) 2016-2016 RhodeCode GmbH
		4	#
		5	# This program is free software: you can redistribute it and/or modify
		6	# it under the terms of the GNU Affero General Public License, version 3
		7	# (only), as published by the Free Software Foundation.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU Affero General Public License
		15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		16	#
		17	# This program is dual-licensed. If you wish to learn more about the
		18	# RhodeCode Enterprise Edition, including its added features, Support services,
		19	# and proprietary license terms, please see https://rhodecode.com/licenses/
		20
		21	import pytest
		22
		23	from rhodecode.lib.codeblocks import (
		24	tokenize_string, split_token_stream, rollup_tokenstream,
		25	render_tokenstream)
		26	from pygments.lexers import get_lexer_by_name
		27
		28
		29	class TestTokenizeString(object):
		30
		31	python_code = '''
		32	import this
		33
		34	var = 6
		35	print "this"
		36
		37	'''
		38
		39	def test_tokenize_as_python(self):
		40	lexer = get_lexer_by_name('python')
		41	tokens = list(tokenize_string(self.python_code, lexer))
		42
		43	assert tokens == [
		44	('', u'\n'),
		45	('', u' '),
		46	('kn', u'import'),
		47	('', u' '),
		48	('nn', u'this'),
		49	('', u'\n'),
		50	('', u'\n'),
		51	('', u' '),
		52	('n', u'var'),
		53	('', u' '),
		54	('o', u'='),
		55	('', u' '),
		56	('mi', u'6'),
		57	('', u'\n'),
		58	('', u' '),
		59	('k', u'print'),
		60	('', u' '),
		61	('s2', u'"'),
		62	('s2', u'this'),
		63	('s2', u'"'),
		64	('', u'\n'),
		65	('', u'\n'),
		66	('', u' ')
		67	]
		68
		69	def test_tokenize_as_text(self):
		70	lexer = get_lexer_by_name('text')
		71	tokens = list(tokenize_string(self.python_code, lexer))
		72
		73	assert tokens == [
		74	('',
		75	u'\n import this\n\n var = 6\n print "this"\n\n ')
		76	]
		77
		78
		79	class TestSplitTokenStream(object):
		80
		81	def test_split_token_stream(self):
		82	lines = list(split_token_stream(
		83	[('type1', 'some\ntext'), ('type2', 'more\n')]))
		84
		85	assert lines == [
		86	[('type1', u'some')],
		87	[('type1', u'text'), ('type2', u'more')],
		88	[('type2', u'')],
		89	]
		90
		91	def test_split_token_stream_other_char(self):
		92	lines = list(split_token_stream(
		93	[('type1', 'some\ntext'), ('type2', 'more\n')],
		94	split_string='m'))
		95
		96	assert lines == [
		97	[('type1', 'so')],
		98	[('type1', 'e\ntext'), ('type2', '')],
		99	[('type2', 'ore\n')],
		100	]
		101
		102	def test_split_token_stream_without_char(self):
		103	lines = list(split_token_stream(
		104	[('type1', 'some\ntext'), ('type2', 'more\n')],
		105	split_string='z'))
		106
		107	assert lines == [
		108	[('type1', 'some\ntext'), ('type2', 'more\n')]
		109	]
		110
		111	def test_split_token_stream_single(self):
		112	lines = list(split_token_stream(
		113	[('type1', '\n')], split_string='\n'))
		114
		115	assert lines == [
		116	[('type1', '')],
		117	[('type1', '')],
		118	]
		119
		120	def test_split_token_stream_single_repeat(self):
		121	lines = list(split_token_stream(
		122	[('type1', '\n\n\n')], split_string='\n'))
		123
		124	assert lines == [
		125	[('type1', '')],
		126	[('type1', '')],
		127	[('type1', '')],
		128	[('type1', '')],
		129	]
		130
		131	def test_split_token_stream_multiple_repeat(self):
		132	lines = list(split_token_stream(
		133	[('type1', '\n\n'), ('type2', '\n\n')], split_string='\n'))
		134
		135	assert lines == [
		136	[('type1', '')],
		137	[('type1', '')],
		138	[('type1', ''), ('type2', '')],
		139	[('type2', '')],
		140	[('type2', '')],
		141	]
		142
		143
		144	class TestRollupTokens(object):
		145
		146	@pytest.mark.parametrize('tokenstream,output', [
		147	([],
		148	[]),
		149	([('A', 'hell'), ('A', 'o')], [
		150	('A', [
		151	('', 'hello')]),
		152	]),
		153	([('A', 'hell'), ('B', 'o')], [
		154	('A', [
		155	('', 'hell')]),
		156	('B', [
		157	('', 'o')]),
		158	]),
		159	([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [
		160	('A', [
		161	('', 'hello')]),
		162	('B', [
		163	('', ' ')]),
		164	('A', [
		165	('', 'there')]),
		166	]),
		167	])
		168	def test_rollup_tokenstream_without_ops(self, tokenstream, output):
		169	assert list(rollup_tokenstream(tokenstream)) == output
		170
		171	@pytest.mark.parametrize('tokenstream,output', [
		172	([],
		173	[]),
		174	([('A', '', 'hell'), ('A', '', 'o')], [
		175	('A', [
		176	('', 'hello')]),
		177	]),
		178	([('A', '', 'hell'), ('B', '', 'o')], [
		179	('A', [
		180	('', 'hell')]),
		181	('B', [
		182	('', 'o')]),
		183	]),
		184	([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [
		185	('A', [
		186	('', 'h')]),
		187	('B', [
		188	('', 'e')]),
		189	('C', [
		190	('', 'y')]),
		191	]),
		192	([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [
		193	('A', [
		194	('', 'he')]),
		195	('C', [
		196	('', 'y')]),
		197	]),
		198	([('A', 'ins', 'h'), ('A', 'ins', 'e')], [
		199	('A', [
		200	('ins', 'he')
		201	]),
		202	]),
		203	([('A', 'ins', 'h'), ('A', 'del', 'e')], [
		204	('A', [
		205	('ins', 'h'),
		206	('del', 'e')
		207	]),
		208	]),
		209	([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [
		210	('A', [
		211	('ins', 'h'),
		212	]),
		213	('B', [
		214	('del', 'ey'),
		215	]),
		216	]),
		217	([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [
		218	('A', [
		219	('ins', 'h'),
		220	('del', 'e'),
		221	]),
		222	('B', [
		223	('del', 'y'),
		224	]),
		225	]),
		226	([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [
		227	('A', [
		228	('', 'some'),
		229	('ins', 'new'),
		230	('', 'name'),
		231	]),
		232	]),
		233	])
		234	def test_rollup_tokenstream_with_ops(self, tokenstream, output):
		235	assert list(rollup_tokenstream(tokenstream)) == output
		236
		237
		238	class TestRenderTokenStream(object):
		239
		240	@pytest.mark.parametrize('tokenstream,output', [
		241	(
		242	[],
		243	'',
		244	),
		245	(
		246	[('', '', u'')],
		247	'<span></span>',
		248	),
		249	(
		250	[('', '', u'text')],
		251	'<span>text</span>',
		252	),
		253	(
		254	[('A', '', u'')],
		255	'<span class="A"></span>',
		256	),
		257	(
		258	[('A', '', u'hello')],
		259	'<span class="A">hello</span>',
		260	),
		261	(
		262	[('A', '', u'hel'), ('A', '', u'lo')],
		263	'<span class="A">hello</span>',
		264	),
		265	(
		266	[('A', '', u'two\n'), ('A', '', u'lines')],
		267	'<span class="A">two<nl>\n</nl>lines</span>',
		268	),
		269	(
		270	[('A', '', u'\nthree\n'), ('A', '', u'lines')],
		271	'<span class="A"><nl>\n</nl>three<nl>\n</nl>lines</span>',
		272	),
		273	(
		274	[('', '', u'\n'), ('A', '', u'line')],
		275	'<span><nl>\n</nl></span><span class="A">line</span>',
		276	),
		277	(
		278	[('', 'ins', u'\n'), ('A', '', u'line')],
		279	'<span><ins><nl>\n</nl></ins></span><span class="A">line</span>',
		280	),
		281	(
		282	[('A', '', u'hel'), ('A', 'ins', u'lo')],
		283	'<span class="A">hel<ins>lo</ins></span>',
		284	),
		285	(
		286	[('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')],
		287	'<span class="A">hel<ins>lo</ins></span>',
		288	),
		289	(
		290	[('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')],
		291	'<span class="A">hel<ins>l</ins><del>o</del></span>',
		292	),
		293	(
		294	[('A', '', u'hel'), ('B', '', u'lo')],
		295	'<span class="A">hel</span><span class="B">lo</span>',
		296	),
		297	(
		298	[('A', '', u'hel'), ('B', 'ins', u'lo')],
		299	'<span class="A">hel</span><span class="B"><ins>lo</ins></span>',
		300	),
		301	])
		302	def test_render_tokenstream_with_ops(self, tokenstream, output):
		303	html = render_tokenstream(tokenstream)
		304	assert html == output
		305
		306	@pytest.mark.parametrize('tokenstream,output', [
		307	(
		308	[('A', u'hel'), ('A', u'lo')],
		309	'<span class="A">hello</span>',
		310	),
		311	(
		312	[('A', u'hel'), ('A', u'l'), ('A', u'o')],
		313	'<span class="A">hello</span>',
		314	),
		315	(
		316	[('A', u'hel'), ('A', u'l'), ('A', u'o')],
		317	'<span class="A">hello</span>',
		318	),
		319	(
		320	[('A', u'hel'), ('B', u'lo')],
		321	'<span class="A">hel</span><span class="B">lo</span>',
		322	),
		323	(
		324	[('A', u'hel'), ('B', u'lo')],
		325	'<span class="A">hel</span><span class="B">lo</span>',
		326	),
		327	])
		328	def test_render_tokenstream_without_ops(self, tokenstream, output):
		329	html = render_tokenstream(tokenstream)
		330	assert html == output

rhodecode/lib/codeblocks.py

0 +116 -49

              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
+             import logging
              from itertools import groupby
              from pygments import lex
-             # PYGMENTS_TOKEN_TYPES is used in a hot loop keep attribute lookups to a minimum
-             from pygments.token import STANDARD_TYPES as PYGMENTS_TOKEN_TYPES
+             from pygments.formatters.html import _get_ttype_class as pygment_token_class
+             from rhodecode.lib.helpers import get_lexer_for_filenode, html_escape
+             from rhodecode.lib.utils2 import AttributeDict
+             from rhodecode.lib.vcs.nodes import FileNode
+             from pygments.lexers import get_lexer_by_name
+             plain_text_lexer = get_lexer_by_name(
+                 'text', stripall=False, stripnl=False, ensurenl=False)
+             log = logging.getLogger()
-             from rhodecode.lib.helpers import get_lexer_for_filenode
-             def tokenize_file(content, lexer):
+             def filenode_as_lines_tokens(filenode, lexer=None):
+                 lexer = lexer or get_lexer_for_filenode(filenode)
+                 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
+                 tokens = tokenize_string(filenode.content, get_lexer_for_filenode(filenode))
+                 lines = split_token_stream(tokens, split_string='\n')
+                 rv = list(lines)
+                 return rv
+             def tokenize_string(content, lexer):
                  """
                  Use pygments to tokenize some content based on a lexer
                  ensuring all original new lines and whitespace is preserved
                  lexer.stripall = False
                  lexer.stripnl = False
                  lexer.ensurenl = False
-                 return lex(content, lexer)
+                 for token_type, token_text in lex(content, lexer):
+                     yield pygment_token_class(token_type), token_text
-             def pygment_token_class(token_type):
-                 """ Convert a pygments token type to html class name """
-                 fname = PYGMENTS_TOKEN_TYPES.get(token_type)
-                 if fname:
-                     return fname
-                 aname = ''
-                 while fname is None:
-                     aname = '-' + token_type[-1] + aname
-                     token_type = token_type.parent
-                     fname = PYGMENTS_TOKEN_TYPES.get(token_type)
-                 return fname + aname
-             def tokens_as_lines(tokens, split_string=u'\n'):
+             def split_token_stream(tokens, split_string=u'\n'):
                  """
                  Take a list of (TokenType, text) tuples and split them by a string
-                 eg. [(TEXT, 'some\ntext')] => [(TEXT, 'some'), (TEXT, 'text')]
+                 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
+                 [(TEXT, 'some'), (TEXT, 'text'),
+                  (TEXT, 'more'), (TEXT, 'text')]
                  """
                  buffer = []
-                 for token_type, token_text in tokens:
+                 for token_class, token_text in tokens:
                      parts = token_text.split(split_string)
                      for part in parts[:-1]:
-                         buffer.append((token_type, part))
+                         buffer.append((token_class, part))
                          yield buffer
                          buffer = []
-                     buffer.append((token_type, parts[-1]))
+                     buffer.append((token_class, parts[-1]))
                  if buffer:
                      yield buffer
-             def filenode_as_lines_tokens(filenode):
-                 """
-                 Return a generator of lines with pygment tokens for a filenode eg:
+                 [
-                     (1, line1_tokens_list),
-                     (2, line1_tokens_list]),
+                 ]
-                 """
-                 return enumerate(
-                   tokens_as_lines(
-                     tokenize_file(
-                       filenode.content, get_lexer_for_filenode(filenode)
+                     )
-                   ),
-)
              def filenode_as_annotated_lines_tokens(filenode):
                  """
                  Take a file node and return a list of annotations => lines, if no annotation
                  ]
                  """
+                 commit_cache = {} # cache commit_getter lookups
-                 # cache commit_getter lookups
-                 commit_cache = {}
                  def _get_annotation(commit_id, commit_getter):
                      if commit_id not in commit_cache:
                          commit_cache[commit_id] = commit_getter()
                  annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
                                        for line_no, tokens
-                                       in filenode_as_lines_tokens(filenode))
+                                       in enumerate(filenode_as_lines_tokens(filenode), 1))
                  grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
                          annotation, [(line_no, tokens)
                                        for (_, line_no, tokens) in group]
                      )
+             def render_tokenstream(tokenstream):
+                 result = []
+                 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
+                     if token_class:
+                         result.append(u'<span class="%s">' % token_class)
+                     else:
+                         result.append(u'<span>')
+                     for op_tag, token_text in token_ops_texts:
+                         if op_tag:
+                             result.append(u'<%s>' % op_tag)
+                         escaped_text = html_escape(token_text)
+                         escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
+                         result.append(escaped_text)
+                         if op_tag:
+                             result.append(u'</%s>' % op_tag)
+                     result.append(u'</span>')
+                 html = ''.join(result)
+                 return html
+             def rollup_tokenstream(tokenstream):
+                 """
+                 Group a token stream of the format:
+                     ('class', 'op', 'text')
+                 or
+                     ('class', 'text')
+                 into
+                     [('class1',
+                         [('op1', 'text'),
+                          ('op2', 'text')]),
+                      ('class2',
+                         [('op3', 'text')])]
+                 This is used to get the minimal tags necessary when
+                 rendering to html eg for a token stream ie.
+                 <span class="A"><ins>he</ins>llo</span>
+                 vs
+                 <span class="A"><ins>he</ins></span><span class="A">llo</span>
+                 If a 2 tuple is passed in, the output op will be an empty string.
+                 eg:
+                 >>> rollup_tokenstream([('classA', '',      'h'),
+                                         ('classA', 'del',   'ell'),
+                                         ('classA', '',      'o'),
+                                         ('classB', '',      ' '),
+                                         ('classA', '',      'the'),
+                                         ('classA', '',      're'),
+                                         ])
+                 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
+                  ('classB', [('', ' ')],
+                  ('classA', [('', 'there')]]
+                 """
+                 if tokenstream and len(tokenstream[0]) == 2:
+                     tokenstream = ((t[0], '', t[1]) for t in tokenstream)
+                 result = []
+                 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
+                     ops = []
+                     for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
+                         text_buffer = []
+                         for t_class, t_op, t_text in token_text_list:
+                             text_buffer.append(t_text)
+                         ops.append((token_op, ''.join(text_buffer)))
+                     result.append((token_class, ops))
+                 return result

rhodecode/public/css/code-block.less

0 +14 -10

              /* START NEW CODE BLOCK CSS */
+             @cb-line-height: 18px;
+             @cb-line-code-padding: 10px;
              table.cb {
                  width: 100%;
                  border-collapse: collapse;
                  td {
                      vertical-align: top;
-                     padding: 2px 10px;
+                     padding: 0;
                      &.cb-content {
+                         font-size: 12.35px;
+                         span.cb-code {
+                             line-height: @cb-line-height;
+                             padding-left: @cb-line-code-padding;
+                             display: block;
                          white-space: pre-wrap;
                          font-family: @font-family-monospace;
-                         font-size: 12.35px;
-                         span {
                              word-break: break-word;
                          }
                      }
                      &.cb-lineno {
                          padding: 0;
-                         height: 1px; /* this allows the <a> link to fill to 100% height of the td */
                          width: 50px;
                          color: rgba(0, 0, 0, 0.3);
                          text-align: right;
                          a::before {
                              content: attr(data-line-no);
                          }
-                         &.cb-line-selected {
+                         &.cb-line-selected a {
                              background: @comment-highlight-color !important;
                          }
                          a {
                              display: block;
-                             height: 100%;
+                             padding-right: @cb-line-code-padding;
+                             line-height: @cb-line-height;
                              color: rgba(0, 0, 0, 0.3);
-                             padding: 0 10px; /* vertical padding is 0 so that height: 100% works */
-                             line-height: 18px; /* use this instead of vertical padding */
                          }
                      }
                      &.cb-content {
-                         &.cb-line-selected {
+                         &.cb-line-selected .cb-code {
                              background: @comment-highlight-color !important;
                          }
                      }

rhodecode/templates/codeblocks/source.html

0 +8 -10

                                      annotation=None,
                                      bgcolor=None)">
                  <%
-                 # avoid module lookups for performance
-                 from rhodecode.lib.codeblocks import pygment_token_class
-                 from rhodecode.lib.helpers import html_escape
+                 from rhodecode.lib.codeblocks import render_tokenstream
+                 # avoid module lookup for performance
+                 html_escape = h.html_escape
                  %>
                  <tr class="cb-line cb-line-fresh"
                  %if annotation:
                  %if bgcolor:
                  style="background: ${bgcolor}"
                  %endif
-                 >${
-                   ''.join(
-                    '<span class="%s">%s</span>' %
-                     (pygment_token_class(token_type), html_escape(token_text))
-                     for token_type, token_text in tokens) + '\n' | n
-                 }</td>
-                 ## this ugly list comp is necessary for performance
+                 >
+                 ## newline at end is necessary for highlight to work when line is empty
+                 ## and for copy pasting code to work as expected
+                   <span class="cb-code">${render_tokenstream(tokens)|n}${'\n'}</span>
+                 </td>
                </tr>
              </%def>

rhodecode/templates/files/files_source.html

0 +1 -1

                                  ${sourceblock.render_annotation_lines(annotation, lines, color_hasher)}
                                %endfor
                              %else:
-                               %for line_num, tokens in c.lines:
+                               %for line_num, tokens in enumerate(c.lines, 1):
                                  ${sourceblock.render_line(line_num, tokens)}
                                %endfor
                              %endif

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages