rhodecode-enterprise-ce Commit - r2546:db577a02

diffs: in case of text lexers don't do any HL because of pygments newline...

marcink -

r2546:db577a02 stable

parent child

rhodecode/lib/codeblocks.py

0 +13 -5

             from pygments import lex
             from pygments.formatters.html import _get_ttype_class as pygment_token_class
+            from pygments.lexers.special import TextLexer, Token
             from rhodecode.lib.helpers import (
                 get_lexer_for_filenode, html_escape, get_custom_lexer)
             from rhodecode.lib.utils2 import AttributeDict
                 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
                           lexer, filenode, org_lexer)
                 tokens = tokenize_string(filenode.content, lexer)
-                lines = split_token_stream(tokens, split_string='\n')
+                lines = split_token_stream(tokens)
                 rv = list(lines)
                 return rv
                 lexer.stripall = False
                 lexer.stripnl = False
                 lexer.ensurenl = False
-                for token_type, token_text in lex(content, lexer):
+                if isinstance(lexer, TextLexer):
+                    lexed = [(Token.Text, content)]
+                else:
+                    lexed = lex(content, lexer)
+                for token_type, token_text in lexed:
                     yield pygment_token_class(token_type), token_text
-            def split_token_stream(tokens, split_string=u'\n'):
+            def split_token_stream(tokens):
                 """
                 Take a list of (TokenType, text) tuples and split them by a string
-                >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
+                split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
                 [(TEXT, 'some'), (TEXT, 'text'),
                  (TEXT, 'more'), (TEXT, 'text')]
                 """
                 buffer = []
                 for token_class, token_text in tokens:
-                    parts = token_text.split(split_string)
+                    parts = token_text.split('\n')
                     for part in parts[:-1]:
                         buffer.append((token_class, part))
                         yield buffer

rhodecode/lib/diffs.py

0 +35 -11

                     :param string:
                     """
                     self.cur_diff_size += len(string)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit Exceeded')
-                    return safe_unicode(string)\
+                    return string \
                         .replace('&', '&amp;')\
                         .replace('<', '&lt;')\
                         .replace('>', '&gt;')
                     for chunk in self._diff.chunks():
                         head = chunk.header
-                        diff = imap(self._escaper, chunk.diff.splitlines(1))
+                        diff = imap(self._escaper, self.diff_splitter(chunk.diff))
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
-                            diff = iter(chunk.diff.splitlines(1))
+                            # simulate splitlines, so we keep the line end part
+                            diff = self.diff_splitter(chunk.diff)
                             # append each file to the diff size
                             raw_chunk_size = len(raw_diff)
                     return diff_container(sorted(_files, key=sorter))
                 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
-                def _parse_lines(self, diff):
+                def _parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
-                    lineiter = iter(diff)
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
-                        line = lineiter.next()
+                        line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                                         'line':       line,
                                     })
-                            line = lineiter.next()
+                            line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                     })
                                     raw_diff.append(line)
-                                line = lineiter.next()
+                                line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                     chunks = []
                     raw_diff = []
-                    diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
+                            # match header e.g @@ -0,0 +1 @@\n'
                             match = self._chunk_re.match(line)
                             if not match:
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
+                @classmethod
+                def diff_splitter(cls, string):
+                    """
+                    Diff split that emulates .splitlines() but works only on \n
+                    """
+                    if not string:
+                        return
+                    elif string == '\n':
+                        yield u'\n'
+                    else:
+                        has_newline = string.endswith('\n')
+                        elements = string.split('\n')
+                        if has_newline:
+                            # skip last element as it's empty string from newlines
+                            elements = elements[:-1]
+                        len_elements = len(elements)
+                        for cnt, line in enumerate(elements, start=1):
+                            last_line = cnt == len_elements
+                            if last_line and not has_newline:
+                                yield safe_unicode(line)
+                            else:
+                                yield safe_unicode(line) + '\n'
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering.

rhodecode/tests/lib/test_codeblocks.py

0 +3 -23

                         [('type2', u'')],
                     ]
-                def test_split_token_stream_other_char(self):
-                    lines = list(split_token_stream(
-                        [('type1', 'some\ntext'), ('type2', 'more\n')],
-                        split_string='m'))
-                    assert lines == [
-                        [('type1', 'so')],
-                        [('type1', 'e\ntext'), ('type2', '')],
-                        [('type2', 'ore\n')],
-                def test_split_token_stream_without_char(self):
-                    lines = list(split_token_stream(
-                        [('type1', 'some\ntext'), ('type2', 'more\n')],
-                        split_string='z'))
-                    assert lines == [
-                        [('type1', 'some\ntext'), ('type2', 'more\n')]
                 def test_split_token_stream_single(self):
                     lines = list(split_token_stream(
-                        [('type1', '\n')], split_string='\n'))
+                        [('type1', '\n')]))
                     assert lines == [
                         [('type1', '')],
                 def test_split_token_stream_single_repeat(self):
                     lines = list(split_token_stream(
-                        [('type1', '\n\n\n')], split_string='\n'))
+                        [('type1', '\n\n\n')]))
                     assert lines == [
                         [('type1', '')],
                 def test_split_token_stream_multiple_repeat(self):
                     lines = list(split_token_stream(
-                        [('type1', '\n\n'), ('type2', '\n\n')], split_string='\n'))
+                        [('type1', '\n\n'), ('type2', '\n\n')]))
                     assert lines == [
                         [('type1', '')],

rhodecode/tests/lib/test_diffs.py

0 +20 -2

                 DiffProcessor,
                 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
                 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
-            from rhodecode.tests.fixture import Fixture
+            from rhodecode.tests.fixture import Fixture, no_newline_id_generator
             from rhodecode.lib.vcs.backends.git.repository import GitDiff
             from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
             from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
                 assert html == expected_html
-            class TestMixedFilenameEncodings:
+            class TestMixedFilenameEncodings(object):
                 @pytest.fixture(scope="class")
                 def raw_diff(self):
                 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
                         for x in diff_proc_d]
                 assert expected_data == data
+            @pytest.mark.parametrize('input_str', [
+                '',
+                '\n',
+                '\n\n',
+                'First\n+second',
+                'First\n+second\n',
+                '\n\n\n Multi \n\n\n',
+                '\n\n\n Multi beginning',
+                'Multi end \n\n\n',
+                'Multi end',
+                '@@ -0,0 +1 @@\n+test_content \n\n b\n'
+            ], ids=no_newline_id_generator)
+            def test_splitlines(input_str):
+                result = DiffProcessor.diff_splitter(input_str)
+                assert list(result) == input_str.splitlines(True)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages