##// END OF EJS Templates
diffs: in case of text lexers don't do any HL because of pygments newline...
marcink -
r2546:db577a02 stable
parent child Browse files
Show More
@@ -24,6 +24,8 b' from itertools import groupby'
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from pygments.lexers.special import TextLexer, Token
28
27 from rhodecode.lib.helpers import (
29 from rhodecode.lib.helpers import (
28 get_lexer_for_filenode, html_escape, get_custom_lexer)
30 get_lexer_for_filenode, html_escape, get_custom_lexer)
29 from rhodecode.lib.utils2 import AttributeDict
31 from rhodecode.lib.utils2 import AttributeDict
@@ -45,7 +47,7 b' def filenode_as_lines_tokens(filenode, l'
45 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
47 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
46 lexer, filenode, org_lexer)
48 lexer, filenode, org_lexer)
47 tokens = tokenize_string(filenode.content, lexer)
49 tokens = tokenize_string(filenode.content, lexer)
48 lines = split_token_stream(tokens, split_string='\n')
50 lines = split_token_stream(tokens)
49 rv = list(lines)
51 rv = list(lines)
50 return rv
52 return rv
51
53
@@ -59,22 +61,28 b' def tokenize_string(content, lexer):'
59 lexer.stripall = False
61 lexer.stripall = False
60 lexer.stripnl = False
62 lexer.stripnl = False
61 lexer.ensurenl = False
63 lexer.ensurenl = False
62 for token_type, token_text in lex(content, lexer):
64
65 if isinstance(lexer, TextLexer):
66 lexed = [(Token.Text, content)]
67 else:
68 lexed = lex(content, lexer)
69
70 for token_type, token_text in lexed:
63 yield pygment_token_class(token_type), token_text
71 yield pygment_token_class(token_type), token_text
64
72
65
73
66 def split_token_stream(tokens, split_string=u'\n'):
74 def split_token_stream(tokens):
67 """
75 """
68 Take a list of (TokenType, text) tuples and split them by a string
76 Take a list of (TokenType, text) tuples and split them by a string
69
77
70 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
78 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
71 [(TEXT, 'some'), (TEXT, 'text'),
79 [(TEXT, 'some'), (TEXT, 'text'),
72 (TEXT, 'more'), (TEXT, 'text')]
80 (TEXT, 'more'), (TEXT, 'text')]
73 """
81 """
74
82
75 buffer = []
83 buffer = []
76 for token_class, token_text in tokens:
84 for token_class, token_text in tokens:
77 parts = token_text.split(split_string)
85 parts = token_text.split('\n')
78 for part in parts[:-1]:
86 for part in parts[:-1]:
79 buffer.append((token_class, part))
87 buffer.append((token_class, part))
80 yield buffer
88 yield buffer
@@ -183,13 +183,12 b' class DiffProcessor(object):'
183
183
184 :param string:
184 :param string:
185 """
185 """
186
187 self.cur_diff_size += len(string)
186 self.cur_diff_size += len(string)
188
187
189 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
188 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
190 raise DiffLimitExceeded('Diff Limit Exceeded')
189 raise DiffLimitExceeded('Diff Limit Exceeded')
191
190
192 return safe_unicode(string)\
191 return string \
193 .replace('&', '&')\
192 .replace('&', '&')\
194 .replace('<', '&lt;')\
193 .replace('<', '&lt;')\
195 .replace('>', '&gt;')
194 .replace('>', '&gt;')
@@ -278,7 +277,7 b' class DiffProcessor(object):'
278 for chunk in self._diff.chunks():
277 for chunk in self._diff.chunks():
279 head = chunk.header
278 head = chunk.header
280
279
281 diff = imap(self._escaper, chunk.diff.splitlines(1))
280 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
282 raw_diff = chunk.raw
281 raw_diff = chunk.raw
283 limited_diff = False
282 limited_diff = False
284 exceeds_limit = False
283 exceeds_limit = False
@@ -529,7 +528,8 b' class DiffProcessor(object):'
529
528
530 # a real non-binary diff
529 # a real non-binary diff
531 if head['a_file'] or head['b_file']:
530 if head['a_file'] or head['b_file']:
532 diff = iter(chunk.diff.splitlines(1))
531 # simulate splitlines, so we keep the line end part
532 diff = self.diff_splitter(chunk.diff)
533
533
534 # append each file to the diff size
534 # append each file to the diff size
535 raw_chunk_size = len(raw_diff)
535 raw_chunk_size = len(raw_diff)
@@ -608,18 +608,17 b' class DiffProcessor(object):'
608 return diff_container(sorted(_files, key=sorter))
608 return diff_container(sorted(_files, key=sorter))
609
609
610 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
610 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
611 def _parse_lines(self, diff):
611 def _parse_lines(self, diff_iter):
612 """
612 """
613 Parse the diff an return data for the template.
613 Parse the diff an return data for the template.
614 """
614 """
615
615
616 lineiter = iter(diff)
617 stats = [0, 0]
616 stats = [0, 0]
618 chunks = []
617 chunks = []
619 raw_diff = []
618 raw_diff = []
620
619
621 try:
620 try:
622 line = lineiter.next()
621 line = diff_iter.next()
623
622
624 while line:
623 while line:
625 raw_diff.append(line)
624 raw_diff.append(line)
@@ -651,7 +650,7 b' class DiffProcessor(object):'
651 'line': line,
650 'line': line,
652 })
651 })
653
652
654 line = lineiter.next()
653 line = diff_iter.next()
655
654
656 while old_line < old_end or new_line < new_end:
655 while old_line < old_end or new_line < new_end:
657 command = ' '
656 command = ' '
@@ -686,7 +685,7 b' class DiffProcessor(object):'
686 })
685 })
687 raw_diff.append(line)
686 raw_diff.append(line)
688
687
689 line = lineiter.next()
688 line = diff_iter.next()
690
689
691 if self._newline_marker.match(line):
690 if self._newline_marker.match(line):
692 # we need to append to lines, since this is not
691 # we need to append to lines, since this is not
@@ -712,13 +711,12 b' class DiffProcessor(object):'
712 chunks = []
711 chunks = []
713 raw_diff = []
712 raw_diff = []
714
713
715 diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
716
717 try:
714 try:
718 line = diff_iter.next()
715 line = diff_iter.next()
719
716
720 while line:
717 while line:
721 raw_diff.append(line)
718 raw_diff.append(line)
719 # match header e.g @@ -0,0 +1 @@\n'
722 match = self._chunk_re.match(line)
720 match = self._chunk_re.match(line)
723
721
724 if not match:
722 if not match:
@@ -826,6 +824,32 b' class DiffProcessor(object):'
826 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
824 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
827 return idstring
825 return idstring
828
826
827 @classmethod
828 def diff_splitter(cls, string):
829 """
830 Diff split that emulates .splitlines() but works only on \n
831 """
832 if not string:
833 return
834 elif string == '\n':
835 yield u'\n'
836 else:
837
838 has_newline = string.endswith('\n')
839 elements = string.split('\n')
840 if has_newline:
841 # skip last element as it's empty string from newlines
842 elements = elements[:-1]
843
844 len_elements = len(elements)
845
846 for cnt, line in enumerate(elements, start=1):
847 last_line = cnt == len_elements
848 if last_line and not has_newline:
849 yield safe_unicode(line)
850 else:
851 yield safe_unicode(line) + '\n'
852
829 def prepare(self, inline_diff=True):
853 def prepare(self, inline_diff=True):
830 """
854 """
831 Prepare the passed udiff for HTML rendering.
855 Prepare the passed udiff for HTML rendering.
@@ -89,29 +89,9 b' class TestSplitTokenStream(object):'
89 [('type2', u'')],
89 [('type2', u'')],
90 ]
90 ]
91
91
92 def test_split_token_stream_other_char(self):
93 lines = list(split_token_stream(
94 [('type1', 'some\ntext'), ('type2', 'more\n')],
95 split_string='m'))
96
97 assert lines == [
98 [('type1', 'so')],
99 [('type1', 'e\ntext'), ('type2', '')],
100 [('type2', 'ore\n')],
101 ]
102
103 def test_split_token_stream_without_char(self):
104 lines = list(split_token_stream(
105 [('type1', 'some\ntext'), ('type2', 'more\n')],
106 split_string='z'))
107
108 assert lines == [
109 [('type1', 'some\ntext'), ('type2', 'more\n')]
110 ]
111
112 def test_split_token_stream_single(self):
92 def test_split_token_stream_single(self):
113 lines = list(split_token_stream(
93 lines = list(split_token_stream(
114 [('type1', '\n')], split_string='\n'))
94 [('type1', '\n')]))
115
95
116 assert lines == [
96 assert lines == [
117 [('type1', '')],
97 [('type1', '')],
@@ -120,7 +100,7 b' class TestSplitTokenStream(object):'
120
100
121 def test_split_token_stream_single_repeat(self):
101 def test_split_token_stream_single_repeat(self):
122 lines = list(split_token_stream(
102 lines = list(split_token_stream(
123 [('type1', '\n\n\n')], split_string='\n'))
103 [('type1', '\n\n\n')]))
124
104
125 assert lines == [
105 assert lines == [
126 [('type1', '')],
106 [('type1', '')],
@@ -131,7 +111,7 b' class TestSplitTokenStream(object):'
131
111
132 def test_split_token_stream_multiple_repeat(self):
112 def test_split_token_stream_multiple_repeat(self):
133 lines = list(split_token_stream(
113 lines = list(split_token_stream(
134 [('type1', '\n\n'), ('type2', '\n\n')], split_string='\n'))
114 [('type1', '\n\n'), ('type2', '\n\n')]))
135
115
136 assert lines == [
116 assert lines == [
137 [('type1', '')],
117 [('type1', '')],
@@ -26,7 +26,7 b' from rhodecode.lib.diffs import ('
26 DiffProcessor,
26 DiffProcessor,
27 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
27 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
28 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
28 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
29 from rhodecode.tests.fixture import Fixture
29 from rhodecode.tests.fixture import Fixture, no_newline_id_generator
30 from rhodecode.lib.vcs.backends.git.repository import GitDiff
30 from rhodecode.lib.vcs.backends.git.repository import GitDiff
31 from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
31 from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
32 from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
32 from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
@@ -162,7 +162,7 b' def test_diffprocessor_as_html_with_comm'
162 assert html == expected_html
162 assert html == expected_html
163
163
164
164
165 class TestMixedFilenameEncodings:
165 class TestMixedFilenameEncodings(object):
166
166
167 @pytest.fixture(scope="class")
167 @pytest.fixture(scope="class")
168 def raw_diff(self):
168 def raw_diff(self):
@@ -811,3 +811,21 b' def test_diff_lib_newlines(diff_fixture_'
811 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
811 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
812 for x in diff_proc_d]
812 for x in diff_proc_d]
813 assert expected_data == data
813 assert expected_data == data
814
815
816 @pytest.mark.parametrize('input_str', [
817 '',
818 '\n',
819 '\n\n',
820 'First\n+second',
821 'First\n+second\n',
822
823 '\n\n\n Multi \n\n\n',
824 '\n\n\n Multi beginning',
825 'Multi end \n\n\n',
826 'Multi end',
827 '@@ -0,0 +1 @@\n+test_content \n\n b\n'
828 ], ids=no_newline_id_generator)
829 def test_splitlines(input_str):
830 result = DiffProcessor.diff_splitter(input_str)
831 assert list(result) == input_str.splitlines(True)
General Comments 0
You need to be logged in to leave comments. Login now