diff --git a/rhodecode/lib/codeblocks.py b/rhodecode/lib/codeblocks.py --- a/rhodecode/lib/codeblocks.py +++ b/rhodecode/lib/codeblocks.py @@ -24,6 +24,8 @@ from itertools import groupby from pygments import lex from pygments.formatters.html import _get_ttype_class as pygment_token_class +from pygments.lexers.special import TextLexer, Token + from rhodecode.lib.helpers import ( get_lexer_for_filenode, html_escape, get_custom_lexer) from rhodecode.lib.utils2 import AttributeDict @@ -45,7 +47,7 @@ def filenode_as_lines_tokens(filenode, l log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s', lexer, filenode, org_lexer) tokens = tokenize_string(filenode.content, lexer) - lines = split_token_stream(tokens, split_string='\n') + lines = split_token_stream(tokens) rv = list(lines) return rv @@ -59,22 +61,28 @@ def tokenize_string(content, lexer): lexer.stripall = False lexer.stripnl = False lexer.ensurenl = False - for token_type, token_text in lex(content, lexer): + + if isinstance(lexer, TextLexer): + lexed = [(Token.Text, content)] + else: + lexed = lex(content, lexer) + + for token_type, token_text in lexed: yield pygment_token_class(token_type), token_text -def split_token_stream(tokens, split_string=u'\n'): +def split_token_stream(tokens): """ Take a list of (TokenType, text) tuples and split them by a string - >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')]) + split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')]) [(TEXT, 'some'), (TEXT, 'text'), (TEXT, 'more'), (TEXT, 'text')] """ buffer = [] for token_class, token_text in tokens: - parts = token_text.split(split_string) + parts = token_text.split('\n') for part in parts[:-1]: buffer.append((token_class, part)) yield buffer diff --git a/rhodecode/lib/diffs.py b/rhodecode/lib/diffs.py --- a/rhodecode/lib/diffs.py +++ b/rhodecode/lib/diffs.py @@ -183,13 +183,12 @@ class DiffProcessor(object): :param string: """ - self.cur_diff_size += len(string) if not self.show_full_diff and (self.cur_diff_size > self.diff_limit): raise DiffLimitExceeded('Diff Limit Exceeded') - return safe_unicode(string)\ + return string \ .replace('&', '&')\ .replace('<', '<')\ .replace('>', '>') @@ -278,7 +277,7 @@ class DiffProcessor(object): for chunk in self._diff.chunks(): head = chunk.header - diff = imap(self._escaper, chunk.diff.splitlines(1)) + diff = imap(self._escaper, self.diff_splitter(chunk.diff)) raw_diff = chunk.raw limited_diff = False exceeds_limit = False @@ -529,7 +528,8 @@ class DiffProcessor(object): # a real non-binary diff if head['a_file'] or head['b_file']: - diff = iter(chunk.diff.splitlines(1)) + # simulate splitlines, so we keep the line end part + diff = self.diff_splitter(chunk.diff) # append each file to the diff size raw_chunk_size = len(raw_diff) @@ -608,18 +608,17 @@ class DiffProcessor(object): return diff_container(sorted(_files, key=sorter)) # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines - def _parse_lines(self, diff): + def _parse_lines(self, diff_iter): """ Parse the diff an return data for the template. """ - lineiter = iter(diff) stats = [0, 0] chunks = [] raw_diff = [] try: - line = lineiter.next() + line = diff_iter.next() while line: raw_diff.append(line) @@ -651,7 +650,7 @@ class DiffProcessor(object): 'line': line, }) - line = lineiter.next() + line = diff_iter.next() while old_line < old_end or new_line < new_end: command = ' ' @@ -686,7 +685,7 @@ class DiffProcessor(object): }) raw_diff.append(line) - line = lineiter.next() + line = diff_iter.next() if self._newline_marker.match(line): # we need to append to lines, since this is not @@ -712,13 +711,12 @@ class DiffProcessor(object): chunks = [] raw_diff = [] - diff_iter = imap(lambda s: safe_unicode(s), diff_iter) - try: line = diff_iter.next() while line: raw_diff.append(line) + # match header e.g @@ -0,0 +1 @@\n' match = self._chunk_re.match(line) if not match: @@ -826,6 +824,32 @@ class DiffProcessor(object): idstring = re.sub(r'(?!-)\W', "", idstring).lower() return idstring + @classmethod + def diff_splitter(cls, string): + """ + Diff split that emulates .splitlines() but works only on \n + """ + if not string: + return + elif string == '\n': + yield u'\n' + else: + + has_newline = string.endswith('\n') + elements = string.split('\n') + if has_newline: + # skip last element as it's empty string from newlines + elements = elements[:-1] + + len_elements = len(elements) + + for cnt, line in enumerate(elements, start=1): + last_line = cnt == len_elements + if last_line and not has_newline: + yield safe_unicode(line) + else: + yield safe_unicode(line) + '\n' + def prepare(self, inline_diff=True): """ Prepare the passed udiff for HTML rendering. diff --git a/rhodecode/tests/lib/test_codeblocks.py b/rhodecode/tests/lib/test_codeblocks.py --- a/rhodecode/tests/lib/test_codeblocks.py +++ b/rhodecode/tests/lib/test_codeblocks.py @@ -89,29 +89,9 @@ class TestSplitTokenStream(object): [('type2', u'')], ] - def test_split_token_stream_other_char(self): - lines = list(split_token_stream( - [('type1', 'some\ntext'), ('type2', 'more\n')], - split_string='m')) - - assert lines == [ - [('type1', 'so')], - [('type1', 'e\ntext'), ('type2', '')], - [('type2', 'ore\n')], - ] - - def test_split_token_stream_without_char(self): - lines = list(split_token_stream( - [('type1', 'some\ntext'), ('type2', 'more\n')], - split_string='z')) - - assert lines == [ - [('type1', 'some\ntext'), ('type2', 'more\n')] - ] - def test_split_token_stream_single(self): lines = list(split_token_stream( - [('type1', '\n')], split_string='\n')) + [('type1', '\n')])) assert lines == [ [('type1', '')], @@ -120,7 +100,7 @@ class TestSplitTokenStream(object): def test_split_token_stream_single_repeat(self): lines = list(split_token_stream( - [('type1', '\n\n\n')], split_string='\n')) + [('type1', '\n\n\n')])) assert lines == [ [('type1', '')], @@ -131,7 +111,7 @@ class TestSplitTokenStream(object): def test_split_token_stream_multiple_repeat(self): lines = list(split_token_stream( - [('type1', '\n\n'), ('type2', '\n\n')], split_string='\n')) + [('type1', '\n\n'), ('type2', '\n\n')])) assert lines == [ [('type1', '')], diff --git a/rhodecode/tests/lib/test_diffs.py b/rhodecode/tests/lib/test_diffs.py --- a/rhodecode/tests/lib/test_diffs.py +++ b/rhodecode/tests/lib/test_diffs.py @@ -26,7 +26,7 @@ from rhodecode.lib.diffs import ( DiffProcessor, NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE, CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE) -from rhodecode.tests.fixture import Fixture +from rhodecode.tests.fixture import Fixture, no_newline_id_generator from rhodecode.lib.vcs.backends.git.repository import GitDiff from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff @@ -162,7 +162,7 @@ def test_diffprocessor_as_html_with_comm assert html == expected_html -class TestMixedFilenameEncodings: +class TestMixedFilenameEncodings(object): @pytest.fixture(scope="class") def raw_diff(self): @@ -811,3 +811,21 @@ def test_diff_lib_newlines(diff_fixture_ data = [(x['filename'], x['operation'], x['stats'], x['raw_diff']) for x in diff_proc_d] assert expected_data == data + + +@pytest.mark.parametrize('input_str', [ + '', + '\n', + '\n\n', + 'First\n+second', + 'First\n+second\n', + + '\n\n\n Multi \n\n\n', + '\n\n\n Multi beginning', + 'Multi end \n\n\n', + 'Multi end', + '@@ -0,0 +1 @@\n+test_content \n\n b\n' +], ids=no_newline_id_generator) +def test_splitlines(input_str): + result = DiffProcessor.diff_splitter(input_str) + assert list(result) == input_str.splitlines(True)