Show More
@@ -24,6 +24,8 b' from itertools import groupby' | |||||
24 |
|
24 | |||
25 | from pygments import lex |
|
25 | from pygments import lex | |
26 | from pygments.formatters.html import _get_ttype_class as pygment_token_class |
|
26 | from pygments.formatters.html import _get_ttype_class as pygment_token_class | |
|
27 | from pygments.lexers.special import TextLexer, Token | |||
|
28 | ||||
27 | from rhodecode.lib.helpers import ( |
|
29 | from rhodecode.lib.helpers import ( | |
28 | get_lexer_for_filenode, html_escape, get_custom_lexer) |
|
30 | get_lexer_for_filenode, html_escape, get_custom_lexer) | |
29 | from rhodecode.lib.utils2 import AttributeDict |
|
31 | from rhodecode.lib.utils2 import AttributeDict | |
@@ -45,7 +47,7 b' def filenode_as_lines_tokens(filenode, l' | |||||
45 | log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s', |
|
47 | log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s', | |
46 | lexer, filenode, org_lexer) |
|
48 | lexer, filenode, org_lexer) | |
47 | tokens = tokenize_string(filenode.content, lexer) |
|
49 | tokens = tokenize_string(filenode.content, lexer) | |
48 |
lines = split_token_stream(tokens |
|
50 | lines = split_token_stream(tokens) | |
49 | rv = list(lines) |
|
51 | rv = list(lines) | |
50 | return rv |
|
52 | return rv | |
51 |
|
53 | |||
@@ -59,22 +61,28 b' def tokenize_string(content, lexer):' | |||||
59 | lexer.stripall = False |
|
61 | lexer.stripall = False | |
60 | lexer.stripnl = False |
|
62 | lexer.stripnl = False | |
61 | lexer.ensurenl = False |
|
63 | lexer.ensurenl = False | |
62 | for token_type, token_text in lex(content, lexer): |
|
64 | ||
|
65 | if isinstance(lexer, TextLexer): | |||
|
66 | lexed = [(Token.Text, content)] | |||
|
67 | else: | |||
|
68 | lexed = lex(content, lexer) | |||
|
69 | ||||
|
70 | for token_type, token_text in lexed: | |||
63 | yield pygment_token_class(token_type), token_text |
|
71 | yield pygment_token_class(token_type), token_text | |
64 |
|
72 | |||
65 |
|
73 | |||
66 |
def split_token_stream(tokens |
|
74 | def split_token_stream(tokens): | |
67 | """ |
|
75 | """ | |
68 | Take a list of (TokenType, text) tuples and split them by a string |
|
76 | Take a list of (TokenType, text) tuples and split them by a string | |
69 |
|
77 | |||
70 |
|
|
78 | split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')]) | |
71 | [(TEXT, 'some'), (TEXT, 'text'), |
|
79 | [(TEXT, 'some'), (TEXT, 'text'), | |
72 | (TEXT, 'more'), (TEXT, 'text')] |
|
80 | (TEXT, 'more'), (TEXT, 'text')] | |
73 | """ |
|
81 | """ | |
74 |
|
82 | |||
75 | buffer = [] |
|
83 | buffer = [] | |
76 | for token_class, token_text in tokens: |
|
84 | for token_class, token_text in tokens: | |
77 |
parts = token_text.split( |
|
85 | parts = token_text.split('\n') | |
78 | for part in parts[:-1]: |
|
86 | for part in parts[:-1]: | |
79 | buffer.append((token_class, part)) |
|
87 | buffer.append((token_class, part)) | |
80 | yield buffer |
|
88 | yield buffer |
@@ -183,13 +183,12 b' class DiffProcessor(object):' | |||||
183 |
|
183 | |||
184 | :param string: |
|
184 | :param string: | |
185 | """ |
|
185 | """ | |
186 |
|
||||
187 | self.cur_diff_size += len(string) |
|
186 | self.cur_diff_size += len(string) | |
188 |
|
187 | |||
189 | if not self.show_full_diff and (self.cur_diff_size > self.diff_limit): |
|
188 | if not self.show_full_diff and (self.cur_diff_size > self.diff_limit): | |
190 | raise DiffLimitExceeded('Diff Limit Exceeded') |
|
189 | raise DiffLimitExceeded('Diff Limit Exceeded') | |
191 |
|
190 | |||
192 |
return |
|
191 | return string \ | |
193 | .replace('&', '&')\ |
|
192 | .replace('&', '&')\ | |
194 | .replace('<', '<')\ |
|
193 | .replace('<', '<')\ | |
195 | .replace('>', '>') |
|
194 | .replace('>', '>') | |
@@ -278,7 +277,7 b' class DiffProcessor(object):' | |||||
278 | for chunk in self._diff.chunks(): |
|
277 | for chunk in self._diff.chunks(): | |
279 | head = chunk.header |
|
278 | head = chunk.header | |
280 |
|
279 | |||
281 |
diff = imap(self._escaper, chunk.diff |
|
280 | diff = imap(self._escaper, self.diff_splitter(chunk.diff)) | |
282 | raw_diff = chunk.raw |
|
281 | raw_diff = chunk.raw | |
283 | limited_diff = False |
|
282 | limited_diff = False | |
284 | exceeds_limit = False |
|
283 | exceeds_limit = False | |
@@ -529,7 +528,8 b' class DiffProcessor(object):' | |||||
529 |
|
528 | |||
530 | # a real non-binary diff |
|
529 | # a real non-binary diff | |
531 | if head['a_file'] or head['b_file']: |
|
530 | if head['a_file'] or head['b_file']: | |
532 | diff = iter(chunk.diff.splitlines(1)) |
|
531 | # simulate splitlines, so we keep the line end part | |
|
532 | diff = self.diff_splitter(chunk.diff) | |||
533 |
|
533 | |||
534 | # append each file to the diff size |
|
534 | # append each file to the diff size | |
535 | raw_chunk_size = len(raw_diff) |
|
535 | raw_chunk_size = len(raw_diff) | |
@@ -608,18 +608,17 b' class DiffProcessor(object):' | |||||
608 | return diff_container(sorted(_files, key=sorter)) |
|
608 | return diff_container(sorted(_files, key=sorter)) | |
609 |
|
609 | |||
610 | # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines |
|
610 | # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines | |
611 | def _parse_lines(self, diff): |
|
611 | def _parse_lines(self, diff_iter): | |
612 | """ |
|
612 | """ | |
613 | Parse the diff an return data for the template. |
|
613 | Parse the diff an return data for the template. | |
614 | """ |
|
614 | """ | |
615 |
|
615 | |||
616 | lineiter = iter(diff) |
|
|||
617 | stats = [0, 0] |
|
616 | stats = [0, 0] | |
618 | chunks = [] |
|
617 | chunks = [] | |
619 | raw_diff = [] |
|
618 | raw_diff = [] | |
620 |
|
619 | |||
621 | try: |
|
620 | try: | |
622 |
line = |
|
621 | line = diff_iter.next() | |
623 |
|
622 | |||
624 | while line: |
|
623 | while line: | |
625 | raw_diff.append(line) |
|
624 | raw_diff.append(line) | |
@@ -651,7 +650,7 b' class DiffProcessor(object):' | |||||
651 | 'line': line, |
|
650 | 'line': line, | |
652 | }) |
|
651 | }) | |
653 |
|
652 | |||
654 |
line = |
|
653 | line = diff_iter.next() | |
655 |
|
654 | |||
656 | while old_line < old_end or new_line < new_end: |
|
655 | while old_line < old_end or new_line < new_end: | |
657 | command = ' ' |
|
656 | command = ' ' | |
@@ -686,7 +685,7 b' class DiffProcessor(object):' | |||||
686 | }) |
|
685 | }) | |
687 | raw_diff.append(line) |
|
686 | raw_diff.append(line) | |
688 |
|
687 | |||
689 |
line = |
|
688 | line = diff_iter.next() | |
690 |
|
689 | |||
691 | if self._newline_marker.match(line): |
|
690 | if self._newline_marker.match(line): | |
692 | # we need to append to lines, since this is not |
|
691 | # we need to append to lines, since this is not | |
@@ -712,13 +711,12 b' class DiffProcessor(object):' | |||||
712 | chunks = [] |
|
711 | chunks = [] | |
713 | raw_diff = [] |
|
712 | raw_diff = [] | |
714 |
|
713 | |||
715 | diff_iter = imap(lambda s: safe_unicode(s), diff_iter) |
|
|||
716 |
|
||||
717 | try: |
|
714 | try: | |
718 | line = diff_iter.next() |
|
715 | line = diff_iter.next() | |
719 |
|
716 | |||
720 | while line: |
|
717 | while line: | |
721 | raw_diff.append(line) |
|
718 | raw_diff.append(line) | |
|
719 | # match header e.g @@ -0,0 +1 @@\n' | |||
722 | match = self._chunk_re.match(line) |
|
720 | match = self._chunk_re.match(line) | |
723 |
|
721 | |||
724 | if not match: |
|
722 | if not match: | |
@@ -826,6 +824,32 b' class DiffProcessor(object):' | |||||
826 | idstring = re.sub(r'(?!-)\W', "", idstring).lower() |
|
824 | idstring = re.sub(r'(?!-)\W', "", idstring).lower() | |
827 | return idstring |
|
825 | return idstring | |
828 |
|
826 | |||
|
827 | @classmethod | |||
|
828 | def diff_splitter(cls, string): | |||
|
829 | """ | |||
|
830 | Diff split that emulates .splitlines() but works only on \n | |||
|
831 | """ | |||
|
832 | if not string: | |||
|
833 | return | |||
|
834 | elif string == '\n': | |||
|
835 | yield u'\n' | |||
|
836 | else: | |||
|
837 | ||||
|
838 | has_newline = string.endswith('\n') | |||
|
839 | elements = string.split('\n') | |||
|
840 | if has_newline: | |||
|
841 | # skip last element as it's empty string from newlines | |||
|
842 | elements = elements[:-1] | |||
|
843 | ||||
|
844 | len_elements = len(elements) | |||
|
845 | ||||
|
846 | for cnt, line in enumerate(elements, start=1): | |||
|
847 | last_line = cnt == len_elements | |||
|
848 | if last_line and not has_newline: | |||
|
849 | yield safe_unicode(line) | |||
|
850 | else: | |||
|
851 | yield safe_unicode(line) + '\n' | |||
|
852 | ||||
829 | def prepare(self, inline_diff=True): |
|
853 | def prepare(self, inline_diff=True): | |
830 | """ |
|
854 | """ | |
831 | Prepare the passed udiff for HTML rendering. |
|
855 | Prepare the passed udiff for HTML rendering. |
@@ -89,29 +89,9 b' class TestSplitTokenStream(object):' | |||||
89 | [('type2', u'')], |
|
89 | [('type2', u'')], | |
90 | ] |
|
90 | ] | |
91 |
|
91 | |||
92 | def test_split_token_stream_other_char(self): |
|
|||
93 | lines = list(split_token_stream( |
|
|||
94 | [('type1', 'some\ntext'), ('type2', 'more\n')], |
|
|||
95 | split_string='m')) |
|
|||
96 |
|
||||
97 | assert lines == [ |
|
|||
98 | [('type1', 'so')], |
|
|||
99 | [('type1', 'e\ntext'), ('type2', '')], |
|
|||
100 | [('type2', 'ore\n')], |
|
|||
101 | ] |
|
|||
102 |
|
||||
103 | def test_split_token_stream_without_char(self): |
|
|||
104 | lines = list(split_token_stream( |
|
|||
105 | [('type1', 'some\ntext'), ('type2', 'more\n')], |
|
|||
106 | split_string='z')) |
|
|||
107 |
|
||||
108 | assert lines == [ |
|
|||
109 | [('type1', 'some\ntext'), ('type2', 'more\n')] |
|
|||
110 | ] |
|
|||
111 |
|
||||
112 | def test_split_token_stream_single(self): |
|
92 | def test_split_token_stream_single(self): | |
113 | lines = list(split_token_stream( |
|
93 | lines = list(split_token_stream( | |
114 |
[('type1', '\n')] |
|
94 | [('type1', '\n')])) | |
115 |
|
95 | |||
116 | assert lines == [ |
|
96 | assert lines == [ | |
117 | [('type1', '')], |
|
97 | [('type1', '')], | |
@@ -120,7 +100,7 b' class TestSplitTokenStream(object):' | |||||
120 |
|
100 | |||
121 | def test_split_token_stream_single_repeat(self): |
|
101 | def test_split_token_stream_single_repeat(self): | |
122 | lines = list(split_token_stream( |
|
102 | lines = list(split_token_stream( | |
123 |
[('type1', '\n\n\n')] |
|
103 | [('type1', '\n\n\n')])) | |
124 |
|
104 | |||
125 | assert lines == [ |
|
105 | assert lines == [ | |
126 | [('type1', '')], |
|
106 | [('type1', '')], | |
@@ -131,7 +111,7 b' class TestSplitTokenStream(object):' | |||||
131 |
|
111 | |||
132 | def test_split_token_stream_multiple_repeat(self): |
|
112 | def test_split_token_stream_multiple_repeat(self): | |
133 | lines = list(split_token_stream( |
|
113 | lines = list(split_token_stream( | |
134 |
[('type1', '\n\n'), ('type2', '\n\n')] |
|
114 | [('type1', '\n\n'), ('type2', '\n\n')])) | |
135 |
|
115 | |||
136 | assert lines == [ |
|
116 | assert lines == [ | |
137 | [('type1', '')], |
|
117 | [('type1', '')], |
@@ -26,7 +26,7 b' from rhodecode.lib.diffs import (' | |||||
26 | DiffProcessor, |
|
26 | DiffProcessor, | |
27 | NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE, |
|
27 | NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE, | |
28 | CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE) |
|
28 | CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE) | |
29 | from rhodecode.tests.fixture import Fixture |
|
29 | from rhodecode.tests.fixture import Fixture, no_newline_id_generator | |
30 | from rhodecode.lib.vcs.backends.git.repository import GitDiff |
|
30 | from rhodecode.lib.vcs.backends.git.repository import GitDiff | |
31 | from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff |
|
31 | from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff | |
32 | from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff |
|
32 | from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff | |
@@ -162,7 +162,7 b' def test_diffprocessor_as_html_with_comm' | |||||
162 | assert html == expected_html |
|
162 | assert html == expected_html | |
163 |
|
163 | |||
164 |
|
164 | |||
165 | class TestMixedFilenameEncodings: |
|
165 | class TestMixedFilenameEncodings(object): | |
166 |
|
166 | |||
167 | @pytest.fixture(scope="class") |
|
167 | @pytest.fixture(scope="class") | |
168 | def raw_diff(self): |
|
168 | def raw_diff(self): | |
@@ -811,3 +811,21 b' def test_diff_lib_newlines(diff_fixture_' | |||||
811 | data = [(x['filename'], x['operation'], x['stats'], x['raw_diff']) |
|
811 | data = [(x['filename'], x['operation'], x['stats'], x['raw_diff']) | |
812 | for x in diff_proc_d] |
|
812 | for x in diff_proc_d] | |
813 | assert expected_data == data |
|
813 | assert expected_data == data | |
|
814 | ||||
|
815 | ||||
|
816 | @pytest.mark.parametrize('input_str', [ | |||
|
817 | '', | |||
|
818 | '\n', | |||
|
819 | '\n\n', | |||
|
820 | 'First\n+second', | |||
|
821 | 'First\n+second\n', | |||
|
822 | ||||
|
823 | '\n\n\n Multi \n\n\n', | |||
|
824 | '\n\n\n Multi beginning', | |||
|
825 | 'Multi end \n\n\n', | |||
|
826 | 'Multi end', | |||
|
827 | '@@ -0,0 +1 @@\n+test_content \n\n b\n' | |||
|
828 | ], ids=no_newline_id_generator) | |||
|
829 | def test_splitlines(input_str): | |||
|
830 | result = DiffProcessor.diff_splitter(input_str) | |||
|
831 | assert list(result) == input_str.splitlines(True) |
General Comments 0
You need to be logged in to leave comments.
Login now