##// END OF EJS Templates
diffs: fixed case of bogus files diff rendering...
ergo -
r3444:e5ce0962 default
parent child Browse files
Show More
@@ -1,776 +1,786 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2019 RhodeCode GmbH
3 # Copyright (C) 2011-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 import difflib
23 from itertools import groupby
23 from itertools import groupby
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from pygments.lexers.special import TextLexer, Token
27 from pygments.lexers.special import TextLexer, Token
28 from pygments.lexers import get_lexer_by_name
28 from pygments.lexers import get_lexer_by_name
29 from pyramid import compat
29 from pyramid import compat
30
30
31 from rhodecode.lib.helpers import (
31 from rhodecode.lib.helpers import (
32 get_lexer_for_filenode, html_escape, get_custom_lexer)
32 get_lexer_for_filenode, html_escape, get_custom_lexer)
33 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
33 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
34 from rhodecode.lib.vcs.nodes import FileNode
34 from rhodecode.lib.vcs.nodes import FileNode
35 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
35 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
36 from rhodecode.lib.diff_match_patch import diff_match_patch
36 from rhodecode.lib.diff_match_patch import diff_match_patch
37 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
37 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
38
38
39
39
40 plain_text_lexer = get_lexer_by_name(
40 plain_text_lexer = get_lexer_by_name(
41 'text', stripall=False, stripnl=False, ensurenl=False)
41 'text', stripall=False, stripnl=False, ensurenl=False)
42
42
43
43
44 log = logging.getLogger(__name__)
44 log = logging.getLogger(__name__)
45
45
46
46
47 def filenode_as_lines_tokens(filenode, lexer=None):
47 def filenode_as_lines_tokens(filenode, lexer=None):
48 org_lexer = lexer
48 org_lexer = lexer
49 lexer = lexer or get_lexer_for_filenode(filenode)
49 lexer = lexer or get_lexer_for_filenode(filenode)
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
51 lexer, filenode, org_lexer)
51 lexer, filenode, org_lexer)
52 tokens = tokenize_string(filenode.content, lexer)
52 content = filenode.content
53 lines = split_token_stream(tokens)
53 tokens = tokenize_string(content, lexer)
54 lines = split_token_stream(tokens, content)
54 rv = list(lines)
55 rv = list(lines)
55 return rv
56 return rv
56
57
57
58
58 def tokenize_string(content, lexer):
59 def tokenize_string(content, lexer):
59 """
60 """
60 Use pygments to tokenize some content based on a lexer
61 Use pygments to tokenize some content based on a lexer
61 ensuring all original new lines and whitespace is preserved
62 ensuring all original new lines and whitespace is preserved
62 """
63 """
63
64
64 lexer.stripall = False
65 lexer.stripall = False
65 lexer.stripnl = False
66 lexer.stripnl = False
66 lexer.ensurenl = False
67 lexer.ensurenl = False
67
68
68 if isinstance(lexer, TextLexer):
69 if isinstance(lexer, TextLexer):
69 lexed = [(Token.Text, content)]
70 lexed = [(Token.Text, content)]
70 else:
71 else:
71 lexed = lex(content, lexer)
72 lexed = lex(content, lexer)
72
73
73 for token_type, token_text in lexed:
74 for token_type, token_text in lexed:
74 yield pygment_token_class(token_type), token_text
75 yield pygment_token_class(token_type), token_text
75
76
76
77
77 def split_token_stream(tokens):
78 def split_token_stream(tokens, content):
78 """
79 """
79 Take a list of (TokenType, text) tuples and split them by a string
80 Take a list of (TokenType, text) tuples and split them by a string
80
81
81 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
82 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
82 [(TEXT, 'some'), (TEXT, 'text'),
83 [(TEXT, 'some'), (TEXT, 'text'),
83 (TEXT, 'more'), (TEXT, 'text')]
84 (TEXT, 'more'), (TEXT, 'text')]
84 """
85 """
85
86
86 buffer = []
87 token_buffer = []
87 for token_class, token_text in tokens:
88 for token_class, token_text in tokens:
88 parts = token_text.split('\n')
89 parts = token_text.split('\n')
89 for part in parts[:-1]:
90 for part in parts[:-1]:
90 buffer.append((token_class, part))
91 token_buffer.append((token_class, part))
91 yield buffer
92 yield token_buffer
92 buffer = []
93 token_buffer = []
94
95 token_buffer.append((token_class, parts[-1]))
93
96
94 buffer.append((token_class, parts[-1]))
97 if token_buffer:
95
98 yield token_buffer
96 if buffer:
99 elif content:
97 yield buffer
100 # this is a special case, we have the content, but tokenization didn't produce
101 # any results. THis can happen if know file extensions like .css have some bogus
102 # unicode content without any newline characters
103 yield [(pygment_token_class(Token.Text), content)]
98
104
99
105
100 def filenode_as_annotated_lines_tokens(filenode):
106 def filenode_as_annotated_lines_tokens(filenode):
101 """
107 """
102 Take a file node and return a list of annotations => lines, if no annotation
108 Take a file node and return a list of annotations => lines, if no annotation
103 is found, it will be None.
109 is found, it will be None.
104
110
105 eg:
111 eg:
106
112
107 [
113 [
108 (annotation1, [
114 (annotation1, [
109 (1, line1_tokens_list),
115 (1, line1_tokens_list),
110 (2, line2_tokens_list),
116 (2, line2_tokens_list),
111 ]),
117 ]),
112 (annotation2, [
118 (annotation2, [
113 (3, line1_tokens_list),
119 (3, line1_tokens_list),
114 ]),
120 ]),
115 (None, [
121 (None, [
116 (4, line1_tokens_list),
122 (4, line1_tokens_list),
117 ]),
123 ]),
118 (annotation1, [
124 (annotation1, [
119 (5, line1_tokens_list),
125 (5, line1_tokens_list),
120 (6, line2_tokens_list),
126 (6, line2_tokens_list),
121 ])
127 ])
122 ]
128 ]
123 """
129 """
124
130
125 commit_cache = {} # cache commit_getter lookups
131 commit_cache = {} # cache commit_getter lookups
126
132
127 def _get_annotation(commit_id, commit_getter):
133 def _get_annotation(commit_id, commit_getter):
128 if commit_id not in commit_cache:
134 if commit_id not in commit_cache:
129 commit_cache[commit_id] = commit_getter()
135 commit_cache[commit_id] = commit_getter()
130 return commit_cache[commit_id]
136 return commit_cache[commit_id]
131
137
132 annotation_lookup = {
138 annotation_lookup = {
133 line_no: _get_annotation(commit_id, commit_getter)
139 line_no: _get_annotation(commit_id, commit_getter)
134 for line_no, commit_id, commit_getter, line_content
140 for line_no, commit_id, commit_getter, line_content
135 in filenode.annotate
141 in filenode.annotate
136 }
142 }
137
143
138 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
144 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
139 for line_no, tokens
145 for line_no, tokens
140 in enumerate(filenode_as_lines_tokens(filenode), 1))
146 in enumerate(filenode_as_lines_tokens(filenode), 1))
141
147
142 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
148 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
143
149
144 for annotation, group in grouped_annotations_lines:
150 for annotation, group in grouped_annotations_lines:
145 yield (
151 yield (
146 annotation, [(line_no, tokens)
152 annotation, [(line_no, tokens)
147 for (_, line_no, tokens) in group]
153 for (_, line_no, tokens) in group]
148 )
154 )
149
155
150
156
151 def render_tokenstream(tokenstream):
157 def render_tokenstream(tokenstream):
152 result = []
158 result = []
153 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
159 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
154
160
155 if token_class:
161 if token_class:
156 result.append(u'<span class="%s">' % token_class)
162 result.append(u'<span class="%s">' % token_class)
157 else:
163 else:
158 result.append(u'<span>')
164 result.append(u'<span>')
159
165
160 for op_tag, token_text in token_ops_texts:
166 for op_tag, token_text in token_ops_texts:
161
167
162 if op_tag:
168 if op_tag:
163 result.append(u'<%s>' % op_tag)
169 result.append(u'<%s>' % op_tag)
164
170
165 escaped_text = html_escape(token_text)
171 escaped_text = html_escape(token_text)
166
172
167 # TODO: dan: investigate showing hidden characters like space/nl/tab
173 # TODO: dan: investigate showing hidden characters like space/nl/tab
168 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
174 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
169 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
175 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
170 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
176 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
171
177
172 result.append(escaped_text)
178 result.append(escaped_text)
173
179
174 if op_tag:
180 if op_tag:
175 result.append(u'</%s>' % op_tag)
181 result.append(u'</%s>' % op_tag)
176
182
177 result.append(u'</span>')
183 result.append(u'</span>')
178
184
179 html = ''.join(result)
185 html = ''.join(result)
180 return html
186 return html
181
187
182
188
183 def rollup_tokenstream(tokenstream):
189 def rollup_tokenstream(tokenstream):
184 """
190 """
185 Group a token stream of the format:
191 Group a token stream of the format:
186
192
187 ('class', 'op', 'text')
193 ('class', 'op', 'text')
188 or
194 or
189 ('class', 'text')
195 ('class', 'text')
190
196
191 into
197 into
192
198
193 [('class1',
199 [('class1',
194 [('op1', 'text'),
200 [('op1', 'text'),
195 ('op2', 'text')]),
201 ('op2', 'text')]),
196 ('class2',
202 ('class2',
197 [('op3', 'text')])]
203 [('op3', 'text')])]
198
204
199 This is used to get the minimal tags necessary when
205 This is used to get the minimal tags necessary when
200 rendering to html eg for a token stream ie.
206 rendering to html eg for a token stream ie.
201
207
202 <span class="A"><ins>he</ins>llo</span>
208 <span class="A"><ins>he</ins>llo</span>
203 vs
209 vs
204 <span class="A"><ins>he</ins></span><span class="A">llo</span>
210 <span class="A"><ins>he</ins></span><span class="A">llo</span>
205
211
206 If a 2 tuple is passed in, the output op will be an empty string.
212 If a 2 tuple is passed in, the output op will be an empty string.
207
213
208 eg:
214 eg:
209
215
210 >>> rollup_tokenstream([('classA', '', 'h'),
216 >>> rollup_tokenstream([('classA', '', 'h'),
211 ('classA', 'del', 'ell'),
217 ('classA', 'del', 'ell'),
212 ('classA', '', 'o'),
218 ('classA', '', 'o'),
213 ('classB', '', ' '),
219 ('classB', '', ' '),
214 ('classA', '', 'the'),
220 ('classA', '', 'the'),
215 ('classA', '', 're'),
221 ('classA', '', 're'),
216 ])
222 ])
217
223
218 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
224 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
219 ('classB', [('', ' ')],
225 ('classB', [('', ' ')],
220 ('classA', [('', 'there')]]
226 ('classA', [('', 'there')]]
221
227
222 """
228 """
223 if tokenstream and len(tokenstream[0]) == 2:
229 if tokenstream and len(tokenstream[0]) == 2:
224 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
230 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
225
231
226 result = []
232 result = []
227 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
233 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
228 ops = []
234 ops = []
229 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
235 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
230 text_buffer = []
236 text_buffer = []
231 for t_class, t_op, t_text in token_text_list:
237 for t_class, t_op, t_text in token_text_list:
232 text_buffer.append(t_text)
238 text_buffer.append(t_text)
233 ops.append((token_op, ''.join(text_buffer)))
239 ops.append((token_op, ''.join(text_buffer)))
234 result.append((token_class, ops))
240 result.append((token_class, ops))
235 return result
241 return result
236
242
237
243
238 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
244 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
239 """
245 """
240 Converts a list of (token_class, token_text) tuples to a list of
246 Converts a list of (token_class, token_text) tuples to a list of
241 (token_class, token_op, token_text) tuples where token_op is one of
247 (token_class, token_op, token_text) tuples where token_op is one of
242 ('ins', 'del', '')
248 ('ins', 'del', '')
243
249
244 :param old_tokens: list of (token_class, token_text) tuples of old line
250 :param old_tokens: list of (token_class, token_text) tuples of old line
245 :param new_tokens: list of (token_class, token_text) tuples of new line
251 :param new_tokens: list of (token_class, token_text) tuples of new line
246 :param use_diff_match_patch: boolean, will use google's diff match patch
252 :param use_diff_match_patch: boolean, will use google's diff match patch
247 library which has options to 'smooth' out the character by character
253 library which has options to 'smooth' out the character by character
248 differences making nicer ins/del blocks
254 differences making nicer ins/del blocks
249 """
255 """
250
256
251 old_tokens_result = []
257 old_tokens_result = []
252 new_tokens_result = []
258 new_tokens_result = []
253
259
254 similarity = difflib.SequenceMatcher(None,
260 similarity = difflib.SequenceMatcher(None,
255 ''.join(token_text for token_class, token_text in old_tokens),
261 ''.join(token_text for token_class, token_text in old_tokens),
256 ''.join(token_text for token_class, token_text in new_tokens)
262 ''.join(token_text for token_class, token_text in new_tokens)
257 ).ratio()
263 ).ratio()
258
264
259 if similarity < 0.6: # return, the blocks are too different
265 if similarity < 0.6: # return, the blocks are too different
260 for token_class, token_text in old_tokens:
266 for token_class, token_text in old_tokens:
261 old_tokens_result.append((token_class, '', token_text))
267 old_tokens_result.append((token_class, '', token_text))
262 for token_class, token_text in new_tokens:
268 for token_class, token_text in new_tokens:
263 new_tokens_result.append((token_class, '', token_text))
269 new_tokens_result.append((token_class, '', token_text))
264 return old_tokens_result, new_tokens_result, similarity
270 return old_tokens_result, new_tokens_result, similarity
265
271
266 token_sequence_matcher = difflib.SequenceMatcher(None,
272 token_sequence_matcher = difflib.SequenceMatcher(None,
267 [x[1] for x in old_tokens],
273 [x[1] for x in old_tokens],
268 [x[1] for x in new_tokens])
274 [x[1] for x in new_tokens])
269
275
270 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
276 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
271 # check the differences by token block types first to give a more
277 # check the differences by token block types first to give a more
272 # nicer "block" level replacement vs character diffs
278 # nicer "block" level replacement vs character diffs
273
279
274 if tag == 'equal':
280 if tag == 'equal':
275 for token_class, token_text in old_tokens[o1:o2]:
281 for token_class, token_text in old_tokens[o1:o2]:
276 old_tokens_result.append((token_class, '', token_text))
282 old_tokens_result.append((token_class, '', token_text))
277 for token_class, token_text in new_tokens[n1:n2]:
283 for token_class, token_text in new_tokens[n1:n2]:
278 new_tokens_result.append((token_class, '', token_text))
284 new_tokens_result.append((token_class, '', token_text))
279 elif tag == 'delete':
285 elif tag == 'delete':
280 for token_class, token_text in old_tokens[o1:o2]:
286 for token_class, token_text in old_tokens[o1:o2]:
281 old_tokens_result.append((token_class, 'del', token_text))
287 old_tokens_result.append((token_class, 'del', token_text))
282 elif tag == 'insert':
288 elif tag == 'insert':
283 for token_class, token_text in new_tokens[n1:n2]:
289 for token_class, token_text in new_tokens[n1:n2]:
284 new_tokens_result.append((token_class, 'ins', token_text))
290 new_tokens_result.append((token_class, 'ins', token_text))
285 elif tag == 'replace':
291 elif tag == 'replace':
286 # if same type token blocks must be replaced, do a diff on the
292 # if same type token blocks must be replaced, do a diff on the
287 # characters in the token blocks to show individual changes
293 # characters in the token blocks to show individual changes
288
294
289 old_char_tokens = []
295 old_char_tokens = []
290 new_char_tokens = []
296 new_char_tokens = []
291 for token_class, token_text in old_tokens[o1:o2]:
297 for token_class, token_text in old_tokens[o1:o2]:
292 for char in token_text:
298 for char in token_text:
293 old_char_tokens.append((token_class, char))
299 old_char_tokens.append((token_class, char))
294
300
295 for token_class, token_text in new_tokens[n1:n2]:
301 for token_class, token_text in new_tokens[n1:n2]:
296 for char in token_text:
302 for char in token_text:
297 new_char_tokens.append((token_class, char))
303 new_char_tokens.append((token_class, char))
298
304
299 old_string = ''.join([token_text for
305 old_string = ''.join([token_text for
300 token_class, token_text in old_char_tokens])
306 token_class, token_text in old_char_tokens])
301 new_string = ''.join([token_text for
307 new_string = ''.join([token_text for
302 token_class, token_text in new_char_tokens])
308 token_class, token_text in new_char_tokens])
303
309
304 char_sequence = difflib.SequenceMatcher(
310 char_sequence = difflib.SequenceMatcher(
305 None, old_string, new_string)
311 None, old_string, new_string)
306 copcodes = char_sequence.get_opcodes()
312 copcodes = char_sequence.get_opcodes()
307 obuffer, nbuffer = [], []
313 obuffer, nbuffer = [], []
308
314
309 if use_diff_match_patch:
315 if use_diff_match_patch:
310 dmp = diff_match_patch()
316 dmp = diff_match_patch()
311 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
317 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
312 reps = dmp.diff_main(old_string, new_string)
318 reps = dmp.diff_main(old_string, new_string)
313 dmp.diff_cleanupEfficiency(reps)
319 dmp.diff_cleanupEfficiency(reps)
314
320
315 a, b = 0, 0
321 a, b = 0, 0
316 for op, rep in reps:
322 for op, rep in reps:
317 l = len(rep)
323 l = len(rep)
318 if op == 0:
324 if op == 0:
319 for i, c in enumerate(rep):
325 for i, c in enumerate(rep):
320 obuffer.append((old_char_tokens[a+i][0], '', c))
326 obuffer.append((old_char_tokens[a+i][0], '', c))
321 nbuffer.append((new_char_tokens[b+i][0], '', c))
327 nbuffer.append((new_char_tokens[b+i][0], '', c))
322 a += l
328 a += l
323 b += l
329 b += l
324 elif op == -1:
330 elif op == -1:
325 for i, c in enumerate(rep):
331 for i, c in enumerate(rep):
326 obuffer.append((old_char_tokens[a+i][0], 'del', c))
332 obuffer.append((old_char_tokens[a+i][0], 'del', c))
327 a += l
333 a += l
328 elif op == 1:
334 elif op == 1:
329 for i, c in enumerate(rep):
335 for i, c in enumerate(rep):
330 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
336 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
331 b += l
337 b += l
332 else:
338 else:
333 for ctag, co1, co2, cn1, cn2 in copcodes:
339 for ctag, co1, co2, cn1, cn2 in copcodes:
334 if ctag == 'equal':
340 if ctag == 'equal':
335 for token_class, token_text in old_char_tokens[co1:co2]:
341 for token_class, token_text in old_char_tokens[co1:co2]:
336 obuffer.append((token_class, '', token_text))
342 obuffer.append((token_class, '', token_text))
337 for token_class, token_text in new_char_tokens[cn1:cn2]:
343 for token_class, token_text in new_char_tokens[cn1:cn2]:
338 nbuffer.append((token_class, '', token_text))
344 nbuffer.append((token_class, '', token_text))
339 elif ctag == 'delete':
345 elif ctag == 'delete':
340 for token_class, token_text in old_char_tokens[co1:co2]:
346 for token_class, token_text in old_char_tokens[co1:co2]:
341 obuffer.append((token_class, 'del', token_text))
347 obuffer.append((token_class, 'del', token_text))
342 elif ctag == 'insert':
348 elif ctag == 'insert':
343 for token_class, token_text in new_char_tokens[cn1:cn2]:
349 for token_class, token_text in new_char_tokens[cn1:cn2]:
344 nbuffer.append((token_class, 'ins', token_text))
350 nbuffer.append((token_class, 'ins', token_text))
345 elif ctag == 'replace':
351 elif ctag == 'replace':
346 for token_class, token_text in old_char_tokens[co1:co2]:
352 for token_class, token_text in old_char_tokens[co1:co2]:
347 obuffer.append((token_class, 'del', token_text))
353 obuffer.append((token_class, 'del', token_text))
348 for token_class, token_text in new_char_tokens[cn1:cn2]:
354 for token_class, token_text in new_char_tokens[cn1:cn2]:
349 nbuffer.append((token_class, 'ins', token_text))
355 nbuffer.append((token_class, 'ins', token_text))
350
356
351 old_tokens_result.extend(obuffer)
357 old_tokens_result.extend(obuffer)
352 new_tokens_result.extend(nbuffer)
358 new_tokens_result.extend(nbuffer)
353
359
354 return old_tokens_result, new_tokens_result, similarity
360 return old_tokens_result, new_tokens_result, similarity
355
361
356
362
357 def diffset_node_getter(commit):
363 def diffset_node_getter(commit):
358 def get_node(fname):
364 def get_node(fname):
359 try:
365 try:
360 return commit.get_node(fname)
366 return commit.get_node(fname)
361 except NodeDoesNotExistError:
367 except NodeDoesNotExistError:
362 return None
368 return None
363
369
364 return get_node
370 return get_node
365
371
366
372
367 class DiffSet(object):
373 class DiffSet(object):
368 """
374 """
369 An object for parsing the diff result from diffs.DiffProcessor and
375 An object for parsing the diff result from diffs.DiffProcessor and
370 adding highlighting, side by side/unified renderings and line diffs
376 adding highlighting, side by side/unified renderings and line diffs
371 """
377 """
372
378
373 HL_REAL = 'REAL' # highlights using original file, slow
379 HL_REAL = 'REAL' # highlights using original file, slow
374 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
380 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
375 # in the case of multiline code
381 # in the case of multiline code
376 HL_NONE = 'NONE' # no highlighting, fastest
382 HL_NONE = 'NONE' # no highlighting, fastest
377
383
378 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
384 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
379 source_repo_name=None,
385 source_repo_name=None,
380 source_node_getter=lambda filename: None,
386 source_node_getter=lambda filename: None,
381 target_repo_name=None,
387 target_repo_name=None,
382 target_node_getter=lambda filename: None,
388 target_node_getter=lambda filename: None,
383 source_nodes=None, target_nodes=None,
389 source_nodes=None, target_nodes=None,
384 # files over this size will use fast highlighting
390 # files over this size will use fast highlighting
385 max_file_size_limit=150 * 1024,
391 max_file_size_limit=150 * 1024,
386 ):
392 ):
387
393
388 self.highlight_mode = highlight_mode
394 self.highlight_mode = highlight_mode
389 self.highlighted_filenodes = {}
395 self.highlighted_filenodes = {}
390 self.source_node_getter = source_node_getter
396 self.source_node_getter = source_node_getter
391 self.target_node_getter = target_node_getter
397 self.target_node_getter = target_node_getter
392 self.source_nodes = source_nodes or {}
398 self.source_nodes = source_nodes or {}
393 self.target_nodes = target_nodes or {}
399 self.target_nodes = target_nodes or {}
394 self.repo_name = repo_name
400 self.repo_name = repo_name
395 self.target_repo_name = target_repo_name or repo_name
401 self.target_repo_name = target_repo_name or repo_name
396 self.source_repo_name = source_repo_name or repo_name
402 self.source_repo_name = source_repo_name or repo_name
397 self.max_file_size_limit = max_file_size_limit
403 self.max_file_size_limit = max_file_size_limit
398
404
399 def render_patchset(self, patchset, source_ref=None, target_ref=None):
405 def render_patchset(self, patchset, source_ref=None, target_ref=None):
400 diffset = AttributeDict(dict(
406 diffset = AttributeDict(dict(
401 lines_added=0,
407 lines_added=0,
402 lines_deleted=0,
408 lines_deleted=0,
403 changed_files=0,
409 changed_files=0,
404 files=[],
410 files=[],
405 file_stats={},
411 file_stats={},
406 limited_diff=isinstance(patchset, LimitedDiffContainer),
412 limited_diff=isinstance(patchset, LimitedDiffContainer),
407 repo_name=self.repo_name,
413 repo_name=self.repo_name,
408 target_repo_name=self.target_repo_name,
414 target_repo_name=self.target_repo_name,
409 source_repo_name=self.source_repo_name,
415 source_repo_name=self.source_repo_name,
410 source_ref=source_ref,
416 source_ref=source_ref,
411 target_ref=target_ref,
417 target_ref=target_ref,
412 ))
418 ))
413 for patch in patchset:
419 for patch in patchset:
414 diffset.file_stats[patch['filename']] = patch['stats']
420 diffset.file_stats[patch['filename']] = patch['stats']
415 filediff = self.render_patch(patch)
421 filediff = self.render_patch(patch)
416 filediff.diffset = StrictAttributeDict(dict(
422 filediff.diffset = StrictAttributeDict(dict(
417 source_ref=diffset.source_ref,
423 source_ref=diffset.source_ref,
418 target_ref=diffset.target_ref,
424 target_ref=diffset.target_ref,
419 repo_name=diffset.repo_name,
425 repo_name=diffset.repo_name,
420 source_repo_name=diffset.source_repo_name,
426 source_repo_name=diffset.source_repo_name,
421 target_repo_name=diffset.target_repo_name,
427 target_repo_name=diffset.target_repo_name,
422 ))
428 ))
423 diffset.files.append(filediff)
429 diffset.files.append(filediff)
424 diffset.changed_files += 1
430 diffset.changed_files += 1
425 if not patch['stats']['binary']:
431 if not patch['stats']['binary']:
426 diffset.lines_added += patch['stats']['added']
432 diffset.lines_added += patch['stats']['added']
427 diffset.lines_deleted += patch['stats']['deleted']
433 diffset.lines_deleted += patch['stats']['deleted']
428
434
429 return diffset
435 return diffset
430
436
431 _lexer_cache = {}
437 _lexer_cache = {}
432
438
433 def _get_lexer_for_filename(self, filename, filenode=None):
439 def _get_lexer_for_filename(self, filename, filenode=None):
434 # cached because we might need to call it twice for source/target
440 # cached because we might need to call it twice for source/target
435 if filename not in self._lexer_cache:
441 if filename not in self._lexer_cache:
436 if filenode:
442 if filenode:
437 lexer = filenode.lexer
443 lexer = filenode.lexer
438 extension = filenode.extension
444 extension = filenode.extension
439 else:
445 else:
440 lexer = FileNode.get_lexer(filename=filename)
446 lexer = FileNode.get_lexer(filename=filename)
441 extension = filename.split('.')[-1]
447 extension = filename.split('.')[-1]
442
448
443 lexer = get_custom_lexer(extension) or lexer
449 lexer = get_custom_lexer(extension) or lexer
444 self._lexer_cache[filename] = lexer
450 self._lexer_cache[filename] = lexer
445 return self._lexer_cache[filename]
451 return self._lexer_cache[filename]
446
452
447 def render_patch(self, patch):
453 def render_patch(self, patch):
448 log.debug('rendering diff for %r', patch['filename'])
454 log.debug('rendering diff for %r', patch['filename'])
449
455
450 source_filename = patch['original_filename']
456 source_filename = patch['original_filename']
451 target_filename = patch['filename']
457 target_filename = patch['filename']
452
458
453 source_lexer = plain_text_lexer
459 source_lexer = plain_text_lexer
454 target_lexer = plain_text_lexer
460 target_lexer = plain_text_lexer
455
461
456 if not patch['stats']['binary']:
462 if not patch['stats']['binary']:
457 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
463 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
458 hl_mode = node_hl_mode or self.highlight_mode
464 hl_mode = node_hl_mode or self.highlight_mode
459
465
460 if hl_mode == self.HL_REAL:
466 if hl_mode == self.HL_REAL:
461 if (source_filename and patch['operation'] in ('D', 'M')
467 if (source_filename and patch['operation'] in ('D', 'M')
462 and source_filename not in self.source_nodes):
468 and source_filename not in self.source_nodes):
463 self.source_nodes[source_filename] = (
469 self.source_nodes[source_filename] = (
464 self.source_node_getter(source_filename))
470 self.source_node_getter(source_filename))
465
471
466 if (target_filename and patch['operation'] in ('A', 'M')
472 if (target_filename and patch['operation'] in ('A', 'M')
467 and target_filename not in self.target_nodes):
473 and target_filename not in self.target_nodes):
468 self.target_nodes[target_filename] = (
474 self.target_nodes[target_filename] = (
469 self.target_node_getter(target_filename))
475 self.target_node_getter(target_filename))
470
476
471 elif hl_mode == self.HL_FAST:
477 elif hl_mode == self.HL_FAST:
472 source_lexer = self._get_lexer_for_filename(source_filename)
478 source_lexer = self._get_lexer_for_filename(source_filename)
473 target_lexer = self._get_lexer_for_filename(target_filename)
479 target_lexer = self._get_lexer_for_filename(target_filename)
474
480
475 source_file = self.source_nodes.get(source_filename, source_filename)
481 source_file = self.source_nodes.get(source_filename, source_filename)
476 target_file = self.target_nodes.get(target_filename, target_filename)
482 target_file = self.target_nodes.get(target_filename, target_filename)
477 raw_id_uid = ''
483 raw_id_uid = ''
478 if self.source_nodes.get(source_filename):
484 if self.source_nodes.get(source_filename):
479 raw_id_uid = self.source_nodes[source_filename].commit.raw_id
485 raw_id_uid = self.source_nodes[source_filename].commit.raw_id
480
486
481 if not raw_id_uid and self.target_nodes.get(target_filename):
487 if not raw_id_uid and self.target_nodes.get(target_filename):
482 # in case this is a new file we only have it in target
488 # in case this is a new file we only have it in target
483 raw_id_uid = self.target_nodes[target_filename].commit.raw_id
489 raw_id_uid = self.target_nodes[target_filename].commit.raw_id
484
490
485 source_filenode, target_filenode = None, None
491 source_filenode, target_filenode = None, None
486
492
487 # TODO: dan: FileNode.lexer works on the content of the file - which
493 # TODO: dan: FileNode.lexer works on the content of the file - which
488 # can be slow - issue #4289 explains a lexer clean up - which once
494 # can be slow - issue #4289 explains a lexer clean up - which once
489 # done can allow caching a lexer for a filenode to avoid the file lookup
495 # done can allow caching a lexer for a filenode to avoid the file lookup
490 if isinstance(source_file, FileNode):
496 if isinstance(source_file, FileNode):
491 source_filenode = source_file
497 source_filenode = source_file
492 #source_lexer = source_file.lexer
498 #source_lexer = source_file.lexer
493 source_lexer = self._get_lexer_for_filename(source_filename)
499 source_lexer = self._get_lexer_for_filename(source_filename)
494 source_file.lexer = source_lexer
500 source_file.lexer = source_lexer
495
501
496 if isinstance(target_file, FileNode):
502 if isinstance(target_file, FileNode):
497 target_filenode = target_file
503 target_filenode = target_file
498 #target_lexer = target_file.lexer
504 #target_lexer = target_file.lexer
499 target_lexer = self._get_lexer_for_filename(target_filename)
505 target_lexer = self._get_lexer_for_filename(target_filename)
500 target_file.lexer = target_lexer
506 target_file.lexer = target_lexer
501
507
502 source_file_path, target_file_path = None, None
508 source_file_path, target_file_path = None, None
503
509
504 if source_filename != '/dev/null':
510 if source_filename != '/dev/null':
505 source_file_path = source_filename
511 source_file_path = source_filename
506 if target_filename != '/dev/null':
512 if target_filename != '/dev/null':
507 target_file_path = target_filename
513 target_file_path = target_filename
508
514
509 source_file_type = source_lexer.name
515 source_file_type = source_lexer.name
510 target_file_type = target_lexer.name
516 target_file_type = target_lexer.name
511
517
512 filediff = AttributeDict({
518 filediff = AttributeDict({
513 'source_file_path': source_file_path,
519 'source_file_path': source_file_path,
514 'target_file_path': target_file_path,
520 'target_file_path': target_file_path,
515 'source_filenode': source_filenode,
521 'source_filenode': source_filenode,
516 'target_filenode': target_filenode,
522 'target_filenode': target_filenode,
517 'source_file_type': target_file_type,
523 'source_file_type': target_file_type,
518 'target_file_type': source_file_type,
524 'target_file_type': source_file_type,
519 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
525 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
520 'operation': patch['operation'],
526 'operation': patch['operation'],
521 'source_mode': patch['stats']['old_mode'],
527 'source_mode': patch['stats']['old_mode'],
522 'target_mode': patch['stats']['new_mode'],
528 'target_mode': patch['stats']['new_mode'],
523 'limited_diff': patch['is_limited_diff'],
529 'limited_diff': patch['is_limited_diff'],
524 'hunks': [],
530 'hunks': [],
525 'hunk_ops': None,
531 'hunk_ops': None,
526 'diffset': self,
532 'diffset': self,
527 'raw_id': raw_id_uid,
533 'raw_id': raw_id_uid,
528 })
534 })
529
535
530 file_chunks = patch['chunks'][1:]
536 file_chunks = patch['chunks'][1:]
531 for hunk in file_chunks:
537 for hunk in file_chunks:
532 hunkbit = self.parse_hunk(hunk, source_file, target_file)
538 hunkbit = self.parse_hunk(hunk, source_file, target_file)
533 hunkbit.source_file_path = source_file_path
539 hunkbit.source_file_path = source_file_path
534 hunkbit.target_file_path = target_file_path
540 hunkbit.target_file_path = target_file_path
535 filediff.hunks.append(hunkbit)
541 filediff.hunks.append(hunkbit)
536
542
537 # Simulate hunk on OPS type line which doesn't really contain any diff
543 # Simulate hunk on OPS type line which doesn't really contain any diff
538 # this allows commenting on those
544 # this allows commenting on those
539 if not file_chunks:
545 if not file_chunks:
540 actions = []
546 actions = []
541 for op_id, op_text in filediff.patch['stats']['ops'].items():
547 for op_id, op_text in filediff.patch['stats']['ops'].items():
542 if op_id == DEL_FILENODE:
548 if op_id == DEL_FILENODE:
543 actions.append(u'file was removed')
549 actions.append(u'file was removed')
544 elif op_id == BIN_FILENODE:
550 elif op_id == BIN_FILENODE:
545 actions.append(u'binary diff hidden')
551 actions.append(u'binary diff hidden')
546 else:
552 else:
547 actions.append(safe_unicode(op_text))
553 actions.append(safe_unicode(op_text))
548 action_line = u'NO CONTENT: ' + \
554 action_line = u'NO CONTENT: ' + \
549 u', '.join(actions) or u'UNDEFINED_ACTION'
555 u', '.join(actions) or u'UNDEFINED_ACTION'
550
556
551 hunk_ops = {'source_length': 0, 'source_start': 0,
557 hunk_ops = {'source_length': 0, 'source_start': 0,
552 'lines': [
558 'lines': [
553 {'new_lineno': 0, 'old_lineno': 1,
559 {'new_lineno': 0, 'old_lineno': 1,
554 'action': 'unmod-no-hl', 'line': action_line}
560 'action': 'unmod-no-hl', 'line': action_line}
555 ],
561 ],
556 'section_header': u'', 'target_start': 1, 'target_length': 1}
562 'section_header': u'', 'target_start': 1, 'target_length': 1}
557
563
558 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
564 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
559 hunkbit.source_file_path = source_file_path
565 hunkbit.source_file_path = source_file_path
560 hunkbit.target_file_path = target_file_path
566 hunkbit.target_file_path = target_file_path
561 filediff.hunk_ops = hunkbit
567 filediff.hunk_ops = hunkbit
562 return filediff
568 return filediff
563
569
564 def parse_hunk(self, hunk, source_file, target_file):
570 def parse_hunk(self, hunk, source_file, target_file):
565 result = AttributeDict(dict(
571 result = AttributeDict(dict(
566 source_start=hunk['source_start'],
572 source_start=hunk['source_start'],
567 source_length=hunk['source_length'],
573 source_length=hunk['source_length'],
568 target_start=hunk['target_start'],
574 target_start=hunk['target_start'],
569 target_length=hunk['target_length'],
575 target_length=hunk['target_length'],
570 section_header=hunk['section_header'],
576 section_header=hunk['section_header'],
571 lines=[],
577 lines=[],
572 ))
578 ))
573 before, after = [], []
579 before, after = [], []
574
580
575 for line in hunk['lines']:
581 for line in hunk['lines']:
576 if line['action'] in ['unmod', 'unmod-no-hl']:
582 if line['action'] in ['unmod', 'unmod-no-hl']:
577 no_hl = line['action'] == 'unmod-no-hl'
583 no_hl = line['action'] == 'unmod-no-hl'
578 result.lines.extend(
584 result.lines.extend(
579 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
585 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
580 after.append(line)
586 after.append(line)
581 before.append(line)
587 before.append(line)
582 elif line['action'] == 'add':
588 elif line['action'] == 'add':
583 after.append(line)
589 after.append(line)
584 elif line['action'] == 'del':
590 elif line['action'] == 'del':
585 before.append(line)
591 before.append(line)
586 elif line['action'] == 'old-no-nl':
592 elif line['action'] == 'old-no-nl':
587 before.append(line)
593 before.append(line)
588 elif line['action'] == 'new-no-nl':
594 elif line['action'] == 'new-no-nl':
589 after.append(line)
595 after.append(line)
590
596
591 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
597 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
592 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
598 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
593 result.lines.extend(
599 result.lines.extend(
594 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
600 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
595 # NOTE(marcink): we must keep list() call here so we can cache the result...
601 # NOTE(marcink): we must keep list() call here so we can cache the result...
596 result.unified = list(self.as_unified(result.lines))
602 result.unified = list(self.as_unified(result.lines))
597 result.sideside = result.lines
603 result.sideside = result.lines
598
604
599 return result
605 return result
600
606
601 def parse_lines(self, before_lines, after_lines, source_file, target_file,
607 def parse_lines(self, before_lines, after_lines, source_file, target_file,
602 no_hl=False):
608 no_hl=False):
603 # TODO: dan: investigate doing the diff comparison and fast highlighting
609 # TODO: dan: investigate doing the diff comparison and fast highlighting
604 # on the entire before and after buffered block lines rather than by
610 # on the entire before and after buffered block lines rather than by
605 # line, this means we can get better 'fast' highlighting if the context
611 # line, this means we can get better 'fast' highlighting if the context
606 # allows it - eg.
612 # allows it - eg.
607 # line 4: """
613 # line 4: """
608 # line 5: this gets highlighted as a string
614 # line 5: this gets highlighted as a string
609 # line 6: """
615 # line 6: """
610
616
611 lines = []
617 lines = []
612
618
613 before_newline = AttributeDict()
619 before_newline = AttributeDict()
614 after_newline = AttributeDict()
620 after_newline = AttributeDict()
615 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
621 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
616 before_newline_line = before_lines.pop(-1)
622 before_newline_line = before_lines.pop(-1)
617 before_newline.content = '\n {}'.format(
623 before_newline.content = '\n {}'.format(
618 render_tokenstream(
624 render_tokenstream(
619 [(x[0], '', x[1])
625 [(x[0], '', x[1])
620 for x in [('nonl', before_newline_line['line'])]]))
626 for x in [('nonl', before_newline_line['line'])]]))
621
627
622 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
628 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
623 after_newline_line = after_lines.pop(-1)
629 after_newline_line = after_lines.pop(-1)
624 after_newline.content = '\n {}'.format(
630 after_newline.content = '\n {}'.format(
625 render_tokenstream(
631 render_tokenstream(
626 [(x[0], '', x[1])
632 [(x[0], '', x[1])
627 for x in [('nonl', after_newline_line['line'])]]))
633 for x in [('nonl', after_newline_line['line'])]]))
628
634
629 while before_lines or after_lines:
635 while before_lines or after_lines:
630 before, after = None, None
636 before, after = None, None
631 before_tokens, after_tokens = None, None
637 before_tokens, after_tokens = None, None
632
638
633 if before_lines:
639 if before_lines:
634 before = before_lines.pop(0)
640 before = before_lines.pop(0)
635 if after_lines:
641 if after_lines:
636 after = after_lines.pop(0)
642 after = after_lines.pop(0)
637
643
638 original = AttributeDict()
644 original = AttributeDict()
639 modified = AttributeDict()
645 modified = AttributeDict()
640
646
641 if before:
647 if before:
642 if before['action'] == 'old-no-nl':
648 if before['action'] == 'old-no-nl':
643 before_tokens = [('nonl', before['line'])]
649 before_tokens = [('nonl', before['line'])]
644 else:
650 else:
645 before_tokens = self.get_line_tokens(
651 before_tokens = self.get_line_tokens(
646 line_text=before['line'], line_number=before['old_lineno'],
652 line_text=before['line'], line_number=before['old_lineno'],
647 input_file=source_file, no_hl=no_hl)
653 input_file=source_file, no_hl=no_hl)
648 original.lineno = before['old_lineno']
654 original.lineno = before['old_lineno']
649 original.content = before['line']
655 original.content = before['line']
650 original.action = self.action_to_op(before['action'])
656 original.action = self.action_to_op(before['action'])
651
657
652 original.get_comment_args = (
658 original.get_comment_args = (
653 source_file, 'o', before['old_lineno'])
659 source_file, 'o', before['old_lineno'])
654
660
655 if after:
661 if after:
656 if after['action'] == 'new-no-nl':
662 if after['action'] == 'new-no-nl':
657 after_tokens = [('nonl', after['line'])]
663 after_tokens = [('nonl', after['line'])]
658 else:
664 else:
659 after_tokens = self.get_line_tokens(
665 after_tokens = self.get_line_tokens(
660 line_text=after['line'], line_number=after['new_lineno'],
666 line_text=after['line'], line_number=after['new_lineno'],
661 input_file=target_file, no_hl=no_hl)
667 input_file=target_file, no_hl=no_hl)
662 modified.lineno = after['new_lineno']
668 modified.lineno = after['new_lineno']
663 modified.content = after['line']
669 modified.content = after['line']
664 modified.action = self.action_to_op(after['action'])
670 modified.action = self.action_to_op(after['action'])
665
671
666 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
672 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
667
673
668 # diff the lines
674 # diff the lines
669 if before_tokens and after_tokens:
675 if before_tokens and after_tokens:
670 o_tokens, m_tokens, similarity = tokens_diff(
676 o_tokens, m_tokens, similarity = tokens_diff(
671 before_tokens, after_tokens)
677 before_tokens, after_tokens)
672 original.content = render_tokenstream(o_tokens)
678 original.content = render_tokenstream(o_tokens)
673 modified.content = render_tokenstream(m_tokens)
679 modified.content = render_tokenstream(m_tokens)
674 elif before_tokens:
680 elif before_tokens:
675 original.content = render_tokenstream(
681 original.content = render_tokenstream(
676 [(x[0], '', x[1]) for x in before_tokens])
682 [(x[0], '', x[1]) for x in before_tokens])
677 elif after_tokens:
683 elif after_tokens:
678 modified.content = render_tokenstream(
684 modified.content = render_tokenstream(
679 [(x[0], '', x[1]) for x in after_tokens])
685 [(x[0], '', x[1]) for x in after_tokens])
680
686
681 if not before_lines and before_newline:
687 if not before_lines and before_newline:
682 original.content += before_newline.content
688 original.content += before_newline.content
683 before_newline = None
689 before_newline = None
684 if not after_lines and after_newline:
690 if not after_lines and after_newline:
685 modified.content += after_newline.content
691 modified.content += after_newline.content
686 after_newline = None
692 after_newline = None
687
693
688 lines.append(AttributeDict({
694 lines.append(AttributeDict({
689 'original': original,
695 'original': original,
690 'modified': modified,
696 'modified': modified,
691 }))
697 }))
692
698
693 return lines
699 return lines
694
700
695 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
701 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
696 filenode = None
702 filenode = None
697 filename = None
703 filename = None
698
704
699 if isinstance(input_file, compat.string_types):
705 if isinstance(input_file, compat.string_types):
700 filename = input_file
706 filename = input_file
701 elif isinstance(input_file, FileNode):
707 elif isinstance(input_file, FileNode):
702 filenode = input_file
708 filenode = input_file
703 filename = input_file.unicode_path
709 filename = input_file.unicode_path
704
710
705 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
711 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
706 if hl_mode == self.HL_REAL and filenode:
712 if hl_mode == self.HL_REAL and filenode:
707 lexer = self._get_lexer_for_filename(filename)
713 lexer = self._get_lexer_for_filename(filename)
708 file_size_allowed = input_file.size < self.max_file_size_limit
714 file_size_allowed = input_file.size < self.max_file_size_limit
709 if line_number and file_size_allowed:
715 if line_number and file_size_allowed:
710 return self.get_tokenized_filenode_line(
716 return self.get_tokenized_filenode_line(
711 input_file, line_number, lexer)
717 input_file, line_number, lexer)
712
718
713 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
719 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
714 lexer = self._get_lexer_for_filename(filename)
720 lexer = self._get_lexer_for_filename(filename)
715 return list(tokenize_string(line_text, lexer))
721 return list(tokenize_string(line_text, lexer))
716
722
717 return list(tokenize_string(line_text, plain_text_lexer))
723 return list(tokenize_string(line_text, plain_text_lexer))
718
724
719 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
725 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
720
726
721 if filenode not in self.highlighted_filenodes:
727 if filenode not in self.highlighted_filenodes:
722 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
728 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
723 self.highlighted_filenodes[filenode] = tokenized_lines
729 self.highlighted_filenodes[filenode] = tokenized_lines
724 return self.highlighted_filenodes[filenode][line_number - 1]
730
731 try:
732 return self.highlighted_filenodes[filenode][line_number - 1]
733 except Exception:
734 return [('', u'rhodecode diff rendering error')]
725
735
726 def action_to_op(self, action):
736 def action_to_op(self, action):
727 return {
737 return {
728 'add': '+',
738 'add': '+',
729 'del': '-',
739 'del': '-',
730 'unmod': ' ',
740 'unmod': ' ',
731 'unmod-no-hl': ' ',
741 'unmod-no-hl': ' ',
732 'old-no-nl': ' ',
742 'old-no-nl': ' ',
733 'new-no-nl': ' ',
743 'new-no-nl': ' ',
734 }.get(action, action)
744 }.get(action, action)
735
745
736 def as_unified(self, lines):
746 def as_unified(self, lines):
737 """
747 """
738 Return a generator that yields the lines of a diff in unified order
748 Return a generator that yields the lines of a diff in unified order
739 """
749 """
740 def generator():
750 def generator():
741 buf = []
751 buf = []
742 for line in lines:
752 for line in lines:
743
753
744 if buf and not line.original or line.original.action == ' ':
754 if buf and not line.original or line.original.action == ' ':
745 for b in buf:
755 for b in buf:
746 yield b
756 yield b
747 buf = []
757 buf = []
748
758
749 if line.original:
759 if line.original:
750 if line.original.action == ' ':
760 if line.original.action == ' ':
751 yield (line.original.lineno, line.modified.lineno,
761 yield (line.original.lineno, line.modified.lineno,
752 line.original.action, line.original.content,
762 line.original.action, line.original.content,
753 line.original.get_comment_args)
763 line.original.get_comment_args)
754 continue
764 continue
755
765
756 if line.original.action == '-':
766 if line.original.action == '-':
757 yield (line.original.lineno, None,
767 yield (line.original.lineno, None,
758 line.original.action, line.original.content,
768 line.original.action, line.original.content,
759 line.original.get_comment_args)
769 line.original.get_comment_args)
760
770
761 if line.modified.action == '+':
771 if line.modified.action == '+':
762 buf.append((
772 buf.append((
763 None, line.modified.lineno,
773 None, line.modified.lineno,
764 line.modified.action, line.modified.content,
774 line.modified.action, line.modified.content,
765 line.modified.get_comment_args))
775 line.modified.get_comment_args))
766 continue
776 continue
767
777
768 if line.modified:
778 if line.modified:
769 yield (None, line.modified.lineno,
779 yield (None, line.modified.lineno,
770 line.modified.action, line.modified.content,
780 line.modified.action, line.modified.content,
771 line.modified.get_comment_args)
781 line.modified.get_comment_args)
772
782
773 for b in buf:
783 for b in buf:
774 yield b
784 yield b
775
785
776 return generator()
786 return generator()
@@ -1,312 +1,335 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2016-2019 RhodeCode GmbH
3 # Copyright (C) 2016-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import pytest
21 import pytest
22 from pygments.lexers import get_lexer_by_name
22 from pygments.lexers import get_lexer_by_name
23
23
24 from rhodecode.tests import no_newline_id_generator
24 from rhodecode.tests import no_newline_id_generator
25 from rhodecode.lib.codeblocks import (
25 from rhodecode.lib.codeblocks import (
26 tokenize_string, split_token_stream, rollup_tokenstream,
26 tokenize_string, split_token_stream, rollup_tokenstream,
27 render_tokenstream)
27 render_tokenstream)
28
28
29
29
30 class TestTokenizeString(object):
30 class TestTokenizeString(object):
31
31
32 python_code = '''
32 python_code = '''
33 import this
33 import this
34
34
35 var = 6
35 var = 6
36 print("this")
36 print("this")
37
37
38 '''
38 '''
39
39
40 def test_tokenize_as_python(self):
40 def test_tokenize_as_python(self):
41 lexer = get_lexer_by_name('python')
41 lexer = get_lexer_by_name('python')
42 tokens = list(tokenize_string(self.python_code, lexer))
42 tokens = list(tokenize_string(self.python_code, lexer))
43
43
44 assert tokens == [
44 assert tokens == [
45 ('', u'\n'),
45 ('', u'\n'),
46 ('', u' '),
46 ('', u' '),
47 ('kn', u'import'),
47 ('kn', u'import'),
48 ('', u' '),
48 ('', u' '),
49 ('nn', u'this'),
49 ('nn', u'this'),
50 ('', u'\n'),
50 ('', u'\n'),
51 ('', u'\n'),
51 ('', u'\n'),
52 ('', u' '),
52 ('', u' '),
53 ('n', u'var'),
53 ('n', u'var'),
54 ('', u' '),
54 ('', u' '),
55 ('o', u'='),
55 ('o', u'='),
56 ('', u' '),
56 ('', u' '),
57 ('mi', u'6'),
57 ('mi', u'6'),
58 ('', u'\n'),
58 ('', u'\n'),
59 ('', u' '),
59 ('', u' '),
60 ('k', u'print'),
60 ('k', u'print'),
61 ('p', u'('),
61 ('p', u'('),
62 ('s2', u'"'),
62 ('s2', u'"'),
63 ('s2', u'this'),
63 ('s2', u'this'),
64 ('s2', u'"'),
64 ('s2', u'"'),
65 ('p', u')'),
65 ('p', u')'),
66 ('', u'\n'),
66 ('', u'\n'),
67 ('', u'\n'),
67 ('', u'\n'),
68 ('', u' ')
68 ('', u' ')
69 ]
69 ]
70
70
71 def test_tokenize_as_text(self):
71 def test_tokenize_as_text(self):
72 lexer = get_lexer_by_name('text')
72 lexer = get_lexer_by_name('text')
73 tokens = list(tokenize_string(self.python_code, lexer))
73 tokens = list(tokenize_string(self.python_code, lexer))
74
74
75 assert tokens == [
75 assert tokens == [
76 ('',
76 ('',
77 u'\n import this\n\n var = 6\n print("this")\n\n ')
77 u'\n import this\n\n var = 6\n print("this")\n\n ')
78 ]
78 ]
79
79
80
80
81 class TestSplitTokenStream(object):
81 class TestSplitTokenStream(object):
82
82
83 def test_split_token_stream(self):
83 def test_split_token_stream(self):
84 lines = list(split_token_stream(
84 tokens = [('type1', 'some\ntext'), ('type2', 'more\n')]
85 [('type1', 'some\ntext'), ('type2', 'more\n')]))
85 content = [x + y for x, y in tokens]
86 lines = list(split_token_stream(tokens, content))
86
87
87 assert lines == [
88 assert lines == [
88 [('type1', u'some')],
89 [('type1', u'some')],
89 [('type1', u'text'), ('type2', u'more')],
90 [('type1', u'text'), ('type2', u'more')],
90 [('type2', u'')],
91 [('type2', u'')],
91 ]
92 ]
92
93
93 def test_split_token_stream_single(self):
94 def test_split_token_stream_single(self):
94 lines = list(split_token_stream(
95 tokens = [('type1', '\n')]
95 [('type1', '\n')]))
96 content = [x + y for x, y in tokens]
96
97 lines = list(split_token_stream(tokens, content))
97 assert lines == [
98 assert lines == [
98 [('type1', '')],
99 [('type1', '')],
99 [('type1', '')],
100 [('type1', '')],
100 ]
101 ]
101
102
102 def test_split_token_stream_single_repeat(self):
103 def test_split_token_stream_single_repeat(self):
103 lines = list(split_token_stream(
104 tokens = [('type1', '\n\n\n')]
104 [('type1', '\n\n\n')]))
105 content = [x + y for x, y in tokens]
105
106 lines = list(split_token_stream(tokens, content))
106 assert lines == [
107 assert lines == [
107 [('type1', '')],
108 [('type1', '')],
108 [('type1', '')],
109 [('type1', '')],
109 [('type1', '')],
110 [('type1', '')],
110 [('type1', '')],
111 [('type1', '')],
111 ]
112 ]
112
113
113 def test_split_token_stream_multiple_repeat(self):
114 def test_split_token_stream_multiple_repeat(self):
114 lines = list(split_token_stream(
115 tokens = [('type1', '\n\n'), ('type2', '\n\n')]
115 [('type1', '\n\n'), ('type2', '\n\n')]))
116 content = [x + y for x, y in tokens]
116
117
118 lines = list(split_token_stream(tokens, content))
117 assert lines == [
119 assert lines == [
118 [('type1', '')],
120 [('type1', '')],
119 [('type1', '')],
121 [('type1', '')],
120 [('type1', ''), ('type2', '')],
122 [('type1', ''), ('type2', '')],
121 [('type2', '')],
123 [('type2', '')],
122 [('type2', '')],
124 [('type2', '')],
123 ]
125 ]
124
126
127 def test_no_tokens_by_content(self):
128 tokens = []
129 content = u'\ufeff'
130 lines = list(split_token_stream(tokens, content))
131 assert lines == [
132 [('', content)],
133 ]
134
135 def test_no_tokens_by_valid_content(self):
136 from pygments.lexers.css import CssLexer
137 content = u'\ufeff table.dataTable'
138 tokens = tokenize_string(content, CssLexer())
139
140 lines = list(split_token_stream(tokens, content))
141 assert lines == [
142 [('', u' '),
143 ('nt', u'table'),
144 ('p', u'.'),
145 ('nc', u'dataTable')],
146 ]
147
125
148
126 class TestRollupTokens(object):
149 class TestRollupTokens(object):
127
150
128 @pytest.mark.parametrize('tokenstream,output', [
151 @pytest.mark.parametrize('tokenstream,output', [
129 ([],
152 ([],
130 []),
153 []),
131 ([('A', 'hell'), ('A', 'o')], [
154 ([('A', 'hell'), ('A', 'o')], [
132 ('A', [
155 ('A', [
133 ('', 'hello')]),
156 ('', 'hello')]),
134 ]),
157 ]),
135 ([('A', 'hell'), ('B', 'o')], [
158 ([('A', 'hell'), ('B', 'o')], [
136 ('A', [
159 ('A', [
137 ('', 'hell')]),
160 ('', 'hell')]),
138 ('B', [
161 ('B', [
139 ('', 'o')]),
162 ('', 'o')]),
140 ]),
163 ]),
141 ([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [
164 ([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [
142 ('A', [
165 ('A', [
143 ('', 'hello')]),
166 ('', 'hello')]),
144 ('B', [
167 ('B', [
145 ('', ' ')]),
168 ('', ' ')]),
146 ('A', [
169 ('A', [
147 ('', 'there')]),
170 ('', 'there')]),
148 ]),
171 ]),
149 ])
172 ])
150 def test_rollup_tokenstream_without_ops(self, tokenstream, output):
173 def test_rollup_tokenstream_without_ops(self, tokenstream, output):
151 assert list(rollup_tokenstream(tokenstream)) == output
174 assert list(rollup_tokenstream(tokenstream)) == output
152
175
153 @pytest.mark.parametrize('tokenstream,output', [
176 @pytest.mark.parametrize('tokenstream,output', [
154 ([],
177 ([],
155 []),
178 []),
156 ([('A', '', 'hell'), ('A', '', 'o')], [
179 ([('A', '', 'hell'), ('A', '', 'o')], [
157 ('A', [
180 ('A', [
158 ('', 'hello')]),
181 ('', 'hello')]),
159 ]),
182 ]),
160 ([('A', '', 'hell'), ('B', '', 'o')], [
183 ([('A', '', 'hell'), ('B', '', 'o')], [
161 ('A', [
184 ('A', [
162 ('', 'hell')]),
185 ('', 'hell')]),
163 ('B', [
186 ('B', [
164 ('', 'o')]),
187 ('', 'o')]),
165 ]),
188 ]),
166 ([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [
189 ([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [
167 ('A', [
190 ('A', [
168 ('', 'h')]),
191 ('', 'h')]),
169 ('B', [
192 ('B', [
170 ('', 'e')]),
193 ('', 'e')]),
171 ('C', [
194 ('C', [
172 ('', 'y')]),
195 ('', 'y')]),
173 ]),
196 ]),
174 ([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [
197 ([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [
175 ('A', [
198 ('A', [
176 ('', 'he')]),
199 ('', 'he')]),
177 ('C', [
200 ('C', [
178 ('', 'y')]),
201 ('', 'y')]),
179 ]),
202 ]),
180 ([('A', 'ins', 'h'), ('A', 'ins', 'e')], [
203 ([('A', 'ins', 'h'), ('A', 'ins', 'e')], [
181 ('A', [
204 ('A', [
182 ('ins', 'he')
205 ('ins', 'he')
183 ]),
206 ]),
184 ]),
207 ]),
185 ([('A', 'ins', 'h'), ('A', 'del', 'e')], [
208 ([('A', 'ins', 'h'), ('A', 'del', 'e')], [
186 ('A', [
209 ('A', [
187 ('ins', 'h'),
210 ('ins', 'h'),
188 ('del', 'e')
211 ('del', 'e')
189 ]),
212 ]),
190 ]),
213 ]),
191 ([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [
214 ([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [
192 ('A', [
215 ('A', [
193 ('ins', 'h'),
216 ('ins', 'h'),
194 ]),
217 ]),
195 ('B', [
218 ('B', [
196 ('del', 'ey'),
219 ('del', 'ey'),
197 ]),
220 ]),
198 ]),
221 ]),
199 ([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [
222 ([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [
200 ('A', [
223 ('A', [
201 ('ins', 'h'),
224 ('ins', 'h'),
202 ('del', 'e'),
225 ('del', 'e'),
203 ]),
226 ]),
204 ('B', [
227 ('B', [
205 ('del', 'y'),
228 ('del', 'y'),
206 ]),
229 ]),
207 ]),
230 ]),
208 ([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [
231 ([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [
209 ('A', [
232 ('A', [
210 ('', 'some'),
233 ('', 'some'),
211 ('ins', 'new'),
234 ('ins', 'new'),
212 ('', 'name'),
235 ('', 'name'),
213 ]),
236 ]),
214 ]),
237 ]),
215 ])
238 ])
216 def test_rollup_tokenstream_with_ops(self, tokenstream, output):
239 def test_rollup_tokenstream_with_ops(self, tokenstream, output):
217 assert list(rollup_tokenstream(tokenstream)) == output
240 assert list(rollup_tokenstream(tokenstream)) == output
218
241
219
242
220 class TestRenderTokenStream(object):
243 class TestRenderTokenStream(object):
221
244
222 @pytest.mark.parametrize('tokenstream,output', [
245 @pytest.mark.parametrize('tokenstream,output', [
223 (
246 (
224 [],
247 [],
225 '',
248 '',
226 ),
249 ),
227 (
250 (
228 [('', '', u'')],
251 [('', '', u'')],
229 '<span></span>',
252 '<span></span>',
230 ),
253 ),
231 (
254 (
232 [('', '', u'text')],
255 [('', '', u'text')],
233 '<span>text</span>',
256 '<span>text</span>',
234 ),
257 ),
235 (
258 (
236 [('A', '', u'')],
259 [('A', '', u'')],
237 '<span class="A"></span>',
260 '<span class="A"></span>',
238 ),
261 ),
239 (
262 (
240 [('A', '', u'hello')],
263 [('A', '', u'hello')],
241 '<span class="A">hello</span>',
264 '<span class="A">hello</span>',
242 ),
265 ),
243 (
266 (
244 [('A', '', u'hel'), ('A', '', u'lo')],
267 [('A', '', u'hel'), ('A', '', u'lo')],
245 '<span class="A">hello</span>',
268 '<span class="A">hello</span>',
246 ),
269 ),
247 (
270 (
248 [('A', '', u'two\n'), ('A', '', u'lines')],
271 [('A', '', u'two\n'), ('A', '', u'lines')],
249 '<span class="A">two\nlines</span>',
272 '<span class="A">two\nlines</span>',
250 ),
273 ),
251 (
274 (
252 [('A', '', u'\nthree\n'), ('A', '', u'lines')],
275 [('A', '', u'\nthree\n'), ('A', '', u'lines')],
253 '<span class="A">\nthree\nlines</span>',
276 '<span class="A">\nthree\nlines</span>',
254 ),
277 ),
255 (
278 (
256 [('', '', u'\n'), ('A', '', u'line')],
279 [('', '', u'\n'), ('A', '', u'line')],
257 '<span>\n</span><span class="A">line</span>',
280 '<span>\n</span><span class="A">line</span>',
258 ),
281 ),
259 (
282 (
260 [('', 'ins', u'\n'), ('A', '', u'line')],
283 [('', 'ins', u'\n'), ('A', '', u'line')],
261 '<span><ins>\n</ins></span><span class="A">line</span>',
284 '<span><ins>\n</ins></span><span class="A">line</span>',
262 ),
285 ),
263 (
286 (
264 [('A', '', u'hel'), ('A', 'ins', u'lo')],
287 [('A', '', u'hel'), ('A', 'ins', u'lo')],
265 '<span class="A">hel<ins>lo</ins></span>',
288 '<span class="A">hel<ins>lo</ins></span>',
266 ),
289 ),
267 (
290 (
268 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')],
291 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')],
269 '<span class="A">hel<ins>lo</ins></span>',
292 '<span class="A">hel<ins>lo</ins></span>',
270 ),
293 ),
271 (
294 (
272 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')],
295 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')],
273 '<span class="A">hel<ins>l</ins><del>o</del></span>',
296 '<span class="A">hel<ins>l</ins><del>o</del></span>',
274 ),
297 ),
275 (
298 (
276 [('A', '', u'hel'), ('B', '', u'lo')],
299 [('A', '', u'hel'), ('B', '', u'lo')],
277 '<span class="A">hel</span><span class="B">lo</span>',
300 '<span class="A">hel</span><span class="B">lo</span>',
278 ),
301 ),
279 (
302 (
280 [('A', '', u'hel'), ('B', 'ins', u'lo')],
303 [('A', '', u'hel'), ('B', 'ins', u'lo')],
281 '<span class="A">hel</span><span class="B"><ins>lo</ins></span>',
304 '<span class="A">hel</span><span class="B"><ins>lo</ins></span>',
282 ),
305 ),
283 ], ids=no_newline_id_generator)
306 ], ids=no_newline_id_generator)
284 def test_render_tokenstream_with_ops(self, tokenstream, output):
307 def test_render_tokenstream_with_ops(self, tokenstream, output):
285 html = render_tokenstream(tokenstream)
308 html = render_tokenstream(tokenstream)
286 assert html == output
309 assert html == output
287
310
288 @pytest.mark.parametrize('tokenstream,output', [
311 @pytest.mark.parametrize('tokenstream,output', [
289 (
312 (
290 [('A', u'hel'), ('A', u'lo')],
313 [('A', u'hel'), ('A', u'lo')],
291 '<span class="A">hello</span>',
314 '<span class="A">hello</span>',
292 ),
315 ),
293 (
316 (
294 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
317 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
295 '<span class="A">hello</span>',
318 '<span class="A">hello</span>',
296 ),
319 ),
297 (
320 (
298 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
321 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
299 '<span class="A">hello</span>',
322 '<span class="A">hello</span>',
300 ),
323 ),
301 (
324 (
302 [('A', u'hel'), ('B', u'lo')],
325 [('A', u'hel'), ('B', u'lo')],
303 '<span class="A">hel</span><span class="B">lo</span>',
326 '<span class="A">hel</span><span class="B">lo</span>',
304 ),
327 ),
305 (
328 (
306 [('A', u'hel'), ('B', u'lo')],
329 [('A', u'hel'), ('B', u'lo')],
307 '<span class="A">hel</span><span class="B">lo</span>',
330 '<span class="A">hel</span><span class="B">lo</span>',
308 ),
331 ),
309 ])
332 ])
310 def test_render_tokenstream_without_ops(self, tokenstream, output):
333 def test_render_tokenstream_without_ops(self, tokenstream, output):
311 html = render_tokenstream(tokenstream)
334 html = render_tokenstream(tokenstream)
312 assert html == output
335 assert html == output
General Comments 0
You need to be logged in to leave comments. Login now