Show More
@@ -24,6 +24,8 b' from itertools import groupby' | |||
|
24 | 24 | |
|
25 | 25 | from pygments import lex |
|
26 | 26 | from pygments.formatters.html import _get_ttype_class as pygment_token_class |
|
27 | from pygments.lexers.special import TextLexer, Token | |
|
28 | ||
|
27 | 29 | from rhodecode.lib.helpers import ( |
|
28 | 30 | get_lexer_for_filenode, html_escape, get_custom_lexer) |
|
29 | 31 | from rhodecode.lib.utils2 import AttributeDict |
@@ -45,7 +47,7 b' def filenode_as_lines_tokens(filenode, l' | |||
|
45 | 47 | log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s', |
|
46 | 48 | lexer, filenode, org_lexer) |
|
47 | 49 | tokens = tokenize_string(filenode.content, lexer) |
|
48 |
lines = split_token_stream(tokens |
|
|
50 | lines = split_token_stream(tokens) | |
|
49 | 51 | rv = list(lines) |
|
50 | 52 | return rv |
|
51 | 53 | |
@@ -59,22 +61,28 b' def tokenize_string(content, lexer):' | |||
|
59 | 61 | lexer.stripall = False |
|
60 | 62 | lexer.stripnl = False |
|
61 | 63 | lexer.ensurenl = False |
|
62 | for token_type, token_text in lex(content, lexer): | |
|
64 | ||
|
65 | if isinstance(lexer, TextLexer): | |
|
66 | lexed = [(Token.Text, content)] | |
|
67 | else: | |
|
68 | lexed = lex(content, lexer) | |
|
69 | ||
|
70 | for token_type, token_text in lexed: | |
|
63 | 71 | yield pygment_token_class(token_type), token_text |
|
64 | 72 | |
|
65 | 73 | |
|
66 |
def split_token_stream(tokens |
|
|
74 | def split_token_stream(tokens): | |
|
67 | 75 | """ |
|
68 | 76 | Take a list of (TokenType, text) tuples and split them by a string |
|
69 | 77 | |
|
70 |
|
|
|
78 | split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')]) | |
|
71 | 79 | [(TEXT, 'some'), (TEXT, 'text'), |
|
72 | 80 | (TEXT, 'more'), (TEXT, 'text')] |
|
73 | 81 | """ |
|
74 | 82 | |
|
75 | 83 | buffer = [] |
|
76 | 84 | for token_class, token_text in tokens: |
|
77 |
parts = token_text.split( |
|
|
85 | parts = token_text.split('\n') | |
|
78 | 86 | for part in parts[:-1]: |
|
79 | 87 | buffer.append((token_class, part)) |
|
80 | 88 | yield buffer |
@@ -183,13 +183,12 b' class DiffProcessor(object):' | |||
|
183 | 183 | |
|
184 | 184 | :param string: |
|
185 | 185 | """ |
|
186 | ||
|
187 | 186 | self.cur_diff_size += len(string) |
|
188 | 187 | |
|
189 | 188 | if not self.show_full_diff and (self.cur_diff_size > self.diff_limit): |
|
190 | 189 | raise DiffLimitExceeded('Diff Limit Exceeded') |
|
191 | 190 | |
|
192 |
return |
|
|
191 | return string \ | |
|
193 | 192 | .replace('&', '&')\ |
|
194 | 193 | .replace('<', '<')\ |
|
195 | 194 | .replace('>', '>') |
@@ -278,7 +277,7 b' class DiffProcessor(object):' | |||
|
278 | 277 | for chunk in self._diff.chunks(): |
|
279 | 278 | head = chunk.header |
|
280 | 279 | |
|
281 |
diff = imap(self._escaper, chunk.diff |
|
|
280 | diff = imap(self._escaper, self.diff_splitter(chunk.diff)) | |
|
282 | 281 | raw_diff = chunk.raw |
|
283 | 282 | limited_diff = False |
|
284 | 283 | exceeds_limit = False |
@@ -529,7 +528,8 b' class DiffProcessor(object):' | |||
|
529 | 528 | |
|
530 | 529 | # a real non-binary diff |
|
531 | 530 | if head['a_file'] or head['b_file']: |
|
532 | diff = iter(chunk.diff.splitlines(1)) | |
|
531 | # simulate splitlines, so we keep the line end part | |
|
532 | diff = self.diff_splitter(chunk.diff) | |
|
533 | 533 | |
|
534 | 534 | # append each file to the diff size |
|
535 | 535 | raw_chunk_size = len(raw_diff) |
@@ -608,18 +608,17 b' class DiffProcessor(object):' | |||
|
608 | 608 | return diff_container(sorted(_files, key=sorter)) |
|
609 | 609 | |
|
610 | 610 | # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines |
|
611 | def _parse_lines(self, diff): | |
|
611 | def _parse_lines(self, diff_iter): | |
|
612 | 612 | """ |
|
613 | 613 | Parse the diff an return data for the template. |
|
614 | 614 | """ |
|
615 | 615 | |
|
616 | lineiter = iter(diff) | |
|
617 | 616 | stats = [0, 0] |
|
618 | 617 | chunks = [] |
|
619 | 618 | raw_diff = [] |
|
620 | 619 | |
|
621 | 620 | try: |
|
622 |
line = |
|
|
621 | line = diff_iter.next() | |
|
623 | 622 | |
|
624 | 623 | while line: |
|
625 | 624 | raw_diff.append(line) |
@@ -651,7 +650,7 b' class DiffProcessor(object):' | |||
|
651 | 650 | 'line': line, |
|
652 | 651 | }) |
|
653 | 652 | |
|
654 |
line = |
|
|
653 | line = diff_iter.next() | |
|
655 | 654 | |
|
656 | 655 | while old_line < old_end or new_line < new_end: |
|
657 | 656 | command = ' ' |
@@ -686,7 +685,7 b' class DiffProcessor(object):' | |||
|
686 | 685 | }) |
|
687 | 686 | raw_diff.append(line) |
|
688 | 687 | |
|
689 |
line = |
|
|
688 | line = diff_iter.next() | |
|
690 | 689 | |
|
691 | 690 | if self._newline_marker.match(line): |
|
692 | 691 | # we need to append to lines, since this is not |
@@ -712,13 +711,12 b' class DiffProcessor(object):' | |||
|
712 | 711 | chunks = [] |
|
713 | 712 | raw_diff = [] |
|
714 | 713 | |
|
715 | diff_iter = imap(lambda s: safe_unicode(s), diff_iter) | |
|
716 | ||
|
717 | 714 | try: |
|
718 | 715 | line = diff_iter.next() |
|
719 | 716 | |
|
720 | 717 | while line: |
|
721 | 718 | raw_diff.append(line) |
|
719 | # match header e.g @@ -0,0 +1 @@\n' | |
|
722 | 720 | match = self._chunk_re.match(line) |
|
723 | 721 | |
|
724 | 722 | if not match: |
@@ -826,6 +824,32 b' class DiffProcessor(object):' | |||
|
826 | 824 | idstring = re.sub(r'(?!-)\W', "", idstring).lower() |
|
827 | 825 | return idstring |
|
828 | 826 | |
|
827 | @classmethod | |
|
828 | def diff_splitter(cls, string): | |
|
829 | """ | |
|
830 | Diff split that emulates .splitlines() but works only on \n | |
|
831 | """ | |
|
832 | if not string: | |
|
833 | return | |
|
834 | elif string == '\n': | |
|
835 | yield u'\n' | |
|
836 | else: | |
|
837 | ||
|
838 | has_newline = string.endswith('\n') | |
|
839 | elements = string.split('\n') | |
|
840 | if has_newline: | |
|
841 | # skip last element as it's empty string from newlines | |
|
842 | elements = elements[:-1] | |
|
843 | ||
|
844 | len_elements = len(elements) | |
|
845 | ||
|
846 | for cnt, line in enumerate(elements, start=1): | |
|
847 | last_line = cnt == len_elements | |
|
848 | if last_line and not has_newline: | |
|
849 | yield safe_unicode(line) | |
|
850 | else: | |
|
851 | yield safe_unicode(line) + '\n' | |
|
852 | ||
|
829 | 853 | def prepare(self, inline_diff=True): |
|
830 | 854 | """ |
|
831 | 855 | Prepare the passed udiff for HTML rendering. |
@@ -89,29 +89,9 b' class TestSplitTokenStream(object):' | |||
|
89 | 89 | [('type2', u'')], |
|
90 | 90 | ] |
|
91 | 91 | |
|
92 | def test_split_token_stream_other_char(self): | |
|
93 | lines = list(split_token_stream( | |
|
94 | [('type1', 'some\ntext'), ('type2', 'more\n')], | |
|
95 | split_string='m')) | |
|
96 | ||
|
97 | assert lines == [ | |
|
98 | [('type1', 'so')], | |
|
99 | [('type1', 'e\ntext'), ('type2', '')], | |
|
100 | [('type2', 'ore\n')], | |
|
101 | ] | |
|
102 | ||
|
103 | def test_split_token_stream_without_char(self): | |
|
104 | lines = list(split_token_stream( | |
|
105 | [('type1', 'some\ntext'), ('type2', 'more\n')], | |
|
106 | split_string='z')) | |
|
107 | ||
|
108 | assert lines == [ | |
|
109 | [('type1', 'some\ntext'), ('type2', 'more\n')] | |
|
110 | ] | |
|
111 | ||
|
112 | 92 | def test_split_token_stream_single(self): |
|
113 | 93 | lines = list(split_token_stream( |
|
114 |
[('type1', '\n')] |
|
|
94 | [('type1', '\n')])) | |
|
115 | 95 | |
|
116 | 96 | assert lines == [ |
|
117 | 97 | [('type1', '')], |
@@ -120,7 +100,7 b' class TestSplitTokenStream(object):' | |||
|
120 | 100 | |
|
121 | 101 | def test_split_token_stream_single_repeat(self): |
|
122 | 102 | lines = list(split_token_stream( |
|
123 |
[('type1', '\n\n\n')] |
|
|
103 | [('type1', '\n\n\n')])) | |
|
124 | 104 | |
|
125 | 105 | assert lines == [ |
|
126 | 106 | [('type1', '')], |
@@ -131,7 +111,7 b' class TestSplitTokenStream(object):' | |||
|
131 | 111 | |
|
132 | 112 | def test_split_token_stream_multiple_repeat(self): |
|
133 | 113 | lines = list(split_token_stream( |
|
134 |
[('type1', '\n\n'), ('type2', '\n\n')] |
|
|
114 | [('type1', '\n\n'), ('type2', '\n\n')])) | |
|
135 | 115 | |
|
136 | 116 | assert lines == [ |
|
137 | 117 | [('type1', '')], |
@@ -26,7 +26,7 b' from rhodecode.lib.diffs import (' | |||
|
26 | 26 | DiffProcessor, |
|
27 | 27 | NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE, |
|
28 | 28 | CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE) |
|
29 | from rhodecode.tests.fixture import Fixture | |
|
29 | from rhodecode.tests.fixture import Fixture, no_newline_id_generator | |
|
30 | 30 | from rhodecode.lib.vcs.backends.git.repository import GitDiff |
|
31 | 31 | from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff |
|
32 | 32 | from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff |
@@ -162,7 +162,7 b' def test_diffprocessor_as_html_with_comm' | |||
|
162 | 162 | assert html == expected_html |
|
163 | 163 | |
|
164 | 164 | |
|
165 | class TestMixedFilenameEncodings: | |
|
165 | class TestMixedFilenameEncodings(object): | |
|
166 | 166 | |
|
167 | 167 | @pytest.fixture(scope="class") |
|
168 | 168 | def raw_diff(self): |
@@ -811,3 +811,21 b' def test_diff_lib_newlines(diff_fixture_' | |||
|
811 | 811 | data = [(x['filename'], x['operation'], x['stats'], x['raw_diff']) |
|
812 | 812 | for x in diff_proc_d] |
|
813 | 813 | assert expected_data == data |
|
814 | ||
|
815 | ||
|
816 | @pytest.mark.parametrize('input_str', [ | |
|
817 | '', | |
|
818 | '\n', | |
|
819 | '\n\n', | |
|
820 | 'First\n+second', | |
|
821 | 'First\n+second\n', | |
|
822 | ||
|
823 | '\n\n\n Multi \n\n\n', | |
|
824 | '\n\n\n Multi beginning', | |
|
825 | 'Multi end \n\n\n', | |
|
826 | 'Multi end', | |
|
827 | '@@ -0,0 +1 @@\n+test_content \n\n b\n' | |
|
828 | ], ids=no_newline_id_generator) | |
|
829 | def test_splitlines(input_str): | |
|
830 | result = DiffProcessor.diff_splitter(input_str) | |
|
831 | assert list(result) == input_str.splitlines(True) |
General Comments 0
You need to be logged in to leave comments.
Login now