u/ewong/rhodecode-enterprise-ce-fork Files · rhodecode/lib/codeblocks.py

codeblocks: add new code token rendering function that...

codeblocks: add new code token rendering function that supports diff and normal tokens.

dan - - Load All Authors

File last commit:

r1025:8ba7d016 default


                r1025:8ba7d016

default

Download file

             codeblocks.py
        
                    214 lines
            
             | 6.3 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / codeblocks.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # -*- coding: utf-8 -*-

      # Copyright (C) 2011-2016  RhodeCode GmbH

      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

      import logging

      from itertools import groupby

      from pygments import lex

      from pygments.formatters.html import _get_ttype_class as pygment_token_class

      from rhodecode.lib.helpers import get_lexer_for_filenode, html_escape

      from rhodecode.lib.utils2 import AttributeDict

      from rhodecode.lib.vcs.nodes import FileNode

      from pygments.lexers import get_lexer_by_name

      plain_text_lexer = get_lexer_by_name(

          'text', stripall=False, stripnl=False, ensurenl=False)

      log = logging.getLogger()

      def filenode_as_lines_tokens(filenode, lexer=None):

          lexer = lexer or get_lexer_for_filenode(filenode)

          log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)

          tokens = tokenize_string(filenode.content, get_lexer_for_filenode(filenode))

          lines = split_token_stream(tokens, split_string='\n')

          rv = list(lines)

          return rv

      def tokenize_string(content, lexer):

          """

          Use pygments to tokenize some content based on a lexer

          ensuring all original new lines and whitespace is preserved

          """

          lexer.stripall = False

          lexer.stripnl = False

          lexer.ensurenl = False

          for token_type, token_text in lex(content, lexer):

              yield pygment_token_class(token_type), token_text

      def split_token_stream(tokens, split_string=u'\n'):

          """

          Take a list of (TokenType, text) tuples and split them by a string

          >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])

          [(TEXT, 'some'), (TEXT, 'text'),

           (TEXT, 'more'), (TEXT, 'text')]

          """

          buffer = []

          for token_class, token_text in tokens:

              parts = token_text.split(split_string)

              for part in parts[:-1]:

                  buffer.append((token_class, part))

                  yield buffer

                  buffer = []

              buffer.append((token_class, parts[-1]))

          if buffer:

              yield buffer

      def filenode_as_annotated_lines_tokens(filenode):

          """

          Take a file node and return a list of annotations => lines, if no annotation

          is found, it will be None.

          eg:

          [

              (annotation1, [

                  (1, line1_tokens_list),

                  (2, line2_tokens_list),

              ]),

              (annotation2, [

                  (3, line1_tokens_list),

              ]),

              (None, [

                  (4, line1_tokens_list),

              ]),

              (annotation1, [

                  (5, line1_tokens_list),

                  (6, line2_tokens_list),

              ])

          ]

          """

          commit_cache = {} # cache commit_getter lookups

          def _get_annotation(commit_id, commit_getter):

              if commit_id not in commit_cache:

                  commit_cache[commit_id] = commit_getter()

              return commit_cache[commit_id]

          annotation_lookup = {

              line_no: _get_annotation(commit_id, commit_getter)

              for line_no, commit_id, commit_getter, line_content

              in filenode.annotate

          }

          annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)

                                for line_no, tokens

                                in enumerate(filenode_as_lines_tokens(filenode), 1))

          grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

          for annotation, group in grouped_annotations_lines:

              yield (

                  annotation, [(line_no, tokens)

                                for (_, line_no, tokens) in group]

              )

      def render_tokenstream(tokenstream):

          result = []

          for token_class, token_ops_texts in rollup_tokenstream(tokenstream):

              if token_class:

                  result.append(u'<span class="%s">' % token_class)

              else:

                  result.append(u'<span>')

              for op_tag, token_text in token_ops_texts:

                  if op_tag:

                      result.append(u'<%s>' % op_tag)

                  escaped_text = html_escape(token_text)

                  escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')

                  result.append(escaped_text)

                  if op_tag:

                      result.append(u'</%s>' % op_tag)

              result.append(u'</span>')

          html = ''.join(result)

          return html

      def rollup_tokenstream(tokenstream):

          """

          Group a token stream of the format:

              ('class', 'op', 'text')

          or

              ('class', 'text')

          into

              [('class1',

                  [('op1', 'text'),

                   ('op2', 'text')]),

               ('class2',

                  [('op3', 'text')])]

          This is used to get the minimal tags necessary when

          rendering to html eg for a token stream ie.

          <span class="A"><ins>he</ins>llo</span>

          vs

          <span class="A"><ins>he</ins></span><span class="A">llo</span>

          If a 2 tuple is passed in, the output op will be an empty string.

          eg:

          >>> rollup_tokenstream([('classA', '',      'h'),

                                  ('classA', 'del',   'ell'),

                                  ('classA', '',      'o'),

                                  ('classB', '',      ' '),

                                  ('classA', '',      'the'),

                                  ('classA', '',      're'),

                                  ])

          [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],

           ('classB', [('', ' ')],

           ('classA', [('', 'there')]]

          """

          if tokenstream and len(tokenstream[0]) == 2:

              tokenstream = ((t[0], '', t[1]) for t in tokenstream)

          result = []

          for token_class, op_list in groupby(tokenstream, lambda t: t[0]):

              ops = []

              for token_op, token_text_list in groupby(op_list, lambda o: o[1]):

                  text_buffer = []

                  for t_class, t_op, t_text in token_text_list:

                      text_buffer.append(t_text)

                  ops.append((token_op, ''.join(text_buffer)))

              result.append((token_class, ops))

          return result

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# -- coding: utf-8 --

				# Copyright (C) 2011-2016 RhodeCode GmbH
				#
				# This program is free software: you can redistribute it and/or modify
				# it under the terms of the GNU Affero General Public License, version 3
				# (only), as published by the Free Software Foundation.
				#
				# This program is distributed in the hope that it will be useful,
				# but WITHOUT ANY WARRANTY; without even the implied warranty of
				# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				# GNU General Public License for more details.
				#
				# You should have received a copy of the GNU Affero General Public License
				# along with this program. If not, see <http://www.gnu.org/licenses/>.
				#
				# This program is dual-licensed. If you wish to learn more about the
				# RhodeCode Enterprise Edition, including its added features, Support services,
				# and proprietary license terms, please see https://rhodecode.com/licenses/

				import logging
				from itertools import groupby

				from pygments import lex
				from pygments.formatters.html import _get_ttype_class as pygment_token_class
				from rhodecode.lib.helpers import get_lexer_for_filenode, html_escape
				from rhodecode.lib.utils2 import AttributeDict
				from rhodecode.lib.vcs.nodes import FileNode
				from pygments.lexers import get_lexer_by_name

				plain_text_lexer = get_lexer_by_name(
				'text', stripall=False, stripnl=False, ensurenl=False)


				log = logging.getLogger()


				def filenode_as_lines_tokens(filenode, lexer=None):
				lexer = lexer or get_lexer_for_filenode(filenode)
				log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
				tokens = tokenize_string(filenode.content, get_lexer_for_filenode(filenode))
				lines = split_token_stream(tokens, split_string='\n')
				rv = list(lines)
				return rv


				def tokenize_string(content, lexer):
				"""
				Use pygments to tokenize some content based on a lexer
				ensuring all original new lines and whitespace is preserved
				"""

				lexer.stripall = False
				lexer.stripnl = False
				lexer.ensurenl = False
				for token_type, token_text in lex(content, lexer):
				yield pygment_token_class(token_type), token_text


				def split_token_stream(tokens, split_string=u'\n'):
				"""
				Take a list of (TokenType, text) tuples and split them by a string

				>>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
				[(TEXT, 'some'), (TEXT, 'text'),
				(TEXT, 'more'), (TEXT, 'text')]
				"""

				buffer = []
				for token_class, token_text in tokens:
				parts = token_text.split(split_string)
				for part in parts[:-1]:
				buffer.append((token_class, part))
				yield buffer
				buffer = []

				buffer.append((token_class, parts[-1]))

				if buffer:
				yield buffer


				def filenode_as_annotated_lines_tokens(filenode):
				"""
				Take a file node and return a list of annotations => lines, if no annotation
				is found, it will be None.

				eg:

				[
				(annotation1, [
				(1, line1_tokens_list),
				(2, line2_tokens_list),
				]),
				(annotation2, [
				(3, line1_tokens_list),
				]),
				(None, [
				(4, line1_tokens_list),
				]),
				(annotation1, [
				(5, line1_tokens_list),
				(6, line2_tokens_list),
				])
				]
				"""

				commit_cache = {} # cache commit_getter lookups

				def _get_annotation(commit_id, commit_getter):
				if commit_id not in commit_cache:
				commit_cache[commit_id] = commit_getter()
				return commit_cache[commit_id]

				annotation_lookup = {
				line_no: _get_annotation(commit_id, commit_getter)
				for line_no, commit_id, commit_getter, line_content
				in filenode.annotate
				}

				annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
				for line_no, tokens
				in enumerate(filenode_as_lines_tokens(filenode), 1))

				grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

				for annotation, group in grouped_annotations_lines:
				yield (
				annotation, [(line_no, tokens)
				for (_, line_no, tokens) in group]
				)


				def render_tokenstream(tokenstream):
				result = []
				for token_class, token_ops_texts in rollup_tokenstream(tokenstream):

				if token_class:
				result.append(u'<span class="%s">' % token_class)
				else:
				result.append(u'<span>')

				for op_tag, token_text in token_ops_texts:

				if op_tag:
				result.append(u'<%s>' % op_tag)

				escaped_text = html_escape(token_text)
				escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')

				result.append(escaped_text)

				if op_tag:
				result.append(u'</%s>' % op_tag)

				result.append(u'</span>')

				html = ''.join(result)
				return html


				def rollup_tokenstream(tokenstream):
				"""
				Group a token stream of the format:

				('class', 'op', 'text')
				or
				('class', 'text')

				into

				[('class1',
				[('op1', 'text'),
				('op2', 'text')]),
				('class2',
				[('op3', 'text')])]

				This is used to get the minimal tags necessary when
				rendering to html eg for a token stream ie.

				<span class="A"><ins>he</ins>llo</span>
				vs
				<span class="A"><ins>he</ins></span><span class="A">llo</span>

				If a 2 tuple is passed in, the output op will be an empty string.

				eg:

				>>> rollup_tokenstream([('classA', '', 'h'),
				('classA', 'del', 'ell'),
				('classA', '', 'o'),
				('classB', '', ' '),
				('classA', '', 'the'),
				('classA', '', 're'),
				])

				[('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
				('classB', [('', ' ')],
				('classA', [('', 'there')]]

				"""
				if tokenstream and len(tokenstream[0]) == 2:
				tokenstream = ((t[0], '', t[1]) for t in tokenstream)

				result = []
				for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
				ops = []
				for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
				text_buffer = []
				for t_class, t_op, t_text in token_text_list:
				text_buffer.append(t_text)
				ops.append((token_op, ''.join(text_buffer)))
				result.append((token_class, ops))
				return result