##// END OF EJS Templates
diffs: report errors on diff rendering.
milka -
r4576:99f87073 stable
parent child Browse files
Show More
@@ -1,793 +1,794 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2020 RhodeCode GmbH
3 # Copyright (C) 2011-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 import difflib
23 from itertools import groupby
23 from itertools import groupby
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from pygments.lexers.special import TextLexer, Token
27 from pygments.lexers.special import TextLexer, Token
28 from pygments.lexers import get_lexer_by_name
28 from pygments.lexers import get_lexer_by_name
29 from pyramid import compat
29 from pyramid import compat
30
30
31 from rhodecode.lib.helpers import (
31 from rhodecode.lib.helpers import (
32 get_lexer_for_filenode, html_escape, get_custom_lexer)
32 get_lexer_for_filenode, html_escape, get_custom_lexer)
33 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
33 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
34 from rhodecode.lib.vcs.nodes import FileNode
34 from rhodecode.lib.vcs.nodes import FileNode
35 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
35 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
36 from rhodecode.lib.diff_match_patch import diff_match_patch
36 from rhodecode.lib.diff_match_patch import diff_match_patch
37 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
37 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
38
38
39
39
40 plain_text_lexer = get_lexer_by_name(
40 plain_text_lexer = get_lexer_by_name(
41 'text', stripall=False, stripnl=False, ensurenl=False)
41 'text', stripall=False, stripnl=False, ensurenl=False)
42
42
43
43
44 log = logging.getLogger(__name__)
44 log = logging.getLogger(__name__)
45
45
46
46
47 def filenode_as_lines_tokens(filenode, lexer=None):
47 def filenode_as_lines_tokens(filenode, lexer=None):
48 org_lexer = lexer
48 org_lexer = lexer
49 lexer = lexer or get_lexer_for_filenode(filenode)
49 lexer = lexer or get_lexer_for_filenode(filenode)
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
51 lexer, filenode, org_lexer)
51 lexer, filenode, org_lexer)
52 content = filenode.content
52 content = filenode.content
53 tokens = tokenize_string(content, lexer)
53 tokens = tokenize_string(content, lexer)
54 lines = split_token_stream(tokens, content)
54 lines = split_token_stream(tokens, content)
55 rv = list(lines)
55 rv = list(lines)
56 return rv
56 return rv
57
57
58
58
59 def tokenize_string(content, lexer):
59 def tokenize_string(content, lexer):
60 """
60 """
61 Use pygments to tokenize some content based on a lexer
61 Use pygments to tokenize some content based on a lexer
62 ensuring all original new lines and whitespace is preserved
62 ensuring all original new lines and whitespace is preserved
63 """
63 """
64
64
65 lexer.stripall = False
65 lexer.stripall = False
66 lexer.stripnl = False
66 lexer.stripnl = False
67 lexer.ensurenl = False
67 lexer.ensurenl = False
68
68
69 if isinstance(lexer, TextLexer):
69 if isinstance(lexer, TextLexer):
70 lexed = [(Token.Text, content)]
70 lexed = [(Token.Text, content)]
71 else:
71 else:
72 lexed = lex(content, lexer)
72 lexed = lex(content, lexer)
73
73
74 for token_type, token_text in lexed:
74 for token_type, token_text in lexed:
75 yield pygment_token_class(token_type), token_text
75 yield pygment_token_class(token_type), token_text
76
76
77
77
78 def split_token_stream(tokens, content):
78 def split_token_stream(tokens, content):
79 """
79 """
80 Take a list of (TokenType, text) tuples and split them by a string
80 Take a list of (TokenType, text) tuples and split them by a string
81
81
82 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
82 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
83 [(TEXT, 'some'), (TEXT, 'text'),
83 [(TEXT, 'some'), (TEXT, 'text'),
84 (TEXT, 'more'), (TEXT, 'text')]
84 (TEXT, 'more'), (TEXT, 'text')]
85 """
85 """
86
86
87 token_buffer = []
87 token_buffer = []
88 for token_class, token_text in tokens:
88 for token_class, token_text in tokens:
89 parts = token_text.split('\n')
89 parts = token_text.split('\n')
90 for part in parts[:-1]:
90 for part in parts[:-1]:
91 token_buffer.append((token_class, part))
91 token_buffer.append((token_class, part))
92 yield token_buffer
92 yield token_buffer
93 token_buffer = []
93 token_buffer = []
94
94
95 token_buffer.append((token_class, parts[-1]))
95 token_buffer.append((token_class, parts[-1]))
96
96
97 if token_buffer:
97 if token_buffer:
98 yield token_buffer
98 yield token_buffer
99 elif content:
99 elif content:
100 # this is a special case, we have the content, but tokenization didn't produce
100 # this is a special case, we have the content, but tokenization didn't produce
101 # any results. THis can happen if know file extensions like .css have some bogus
101 # any results. THis can happen if know file extensions like .css have some bogus
102 # unicode content without any newline characters
102 # unicode content without any newline characters
103 yield [(pygment_token_class(Token.Text), content)]
103 yield [(pygment_token_class(Token.Text), content)]
104
104
105
105
106 def filenode_as_annotated_lines_tokens(filenode):
106 def filenode_as_annotated_lines_tokens(filenode):
107 """
107 """
108 Take a file node and return a list of annotations => lines, if no annotation
108 Take a file node and return a list of annotations => lines, if no annotation
109 is found, it will be None.
109 is found, it will be None.
110
110
111 eg:
111 eg:
112
112
113 [
113 [
114 (annotation1, [
114 (annotation1, [
115 (1, line1_tokens_list),
115 (1, line1_tokens_list),
116 (2, line2_tokens_list),
116 (2, line2_tokens_list),
117 ]),
117 ]),
118 (annotation2, [
118 (annotation2, [
119 (3, line1_tokens_list),
119 (3, line1_tokens_list),
120 ]),
120 ]),
121 (None, [
121 (None, [
122 (4, line1_tokens_list),
122 (4, line1_tokens_list),
123 ]),
123 ]),
124 (annotation1, [
124 (annotation1, [
125 (5, line1_tokens_list),
125 (5, line1_tokens_list),
126 (6, line2_tokens_list),
126 (6, line2_tokens_list),
127 ])
127 ])
128 ]
128 ]
129 """
129 """
130
130
131 commit_cache = {} # cache commit_getter lookups
131 commit_cache = {} # cache commit_getter lookups
132
132
133 def _get_annotation(commit_id, commit_getter):
133 def _get_annotation(commit_id, commit_getter):
134 if commit_id not in commit_cache:
134 if commit_id not in commit_cache:
135 commit_cache[commit_id] = commit_getter()
135 commit_cache[commit_id] = commit_getter()
136 return commit_cache[commit_id]
136 return commit_cache[commit_id]
137
137
138 annotation_lookup = {
138 annotation_lookup = {
139 line_no: _get_annotation(commit_id, commit_getter)
139 line_no: _get_annotation(commit_id, commit_getter)
140 for line_no, commit_id, commit_getter, line_content
140 for line_no, commit_id, commit_getter, line_content
141 in filenode.annotate
141 in filenode.annotate
142 }
142 }
143
143
144 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
144 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
145 for line_no, tokens
145 for line_no, tokens
146 in enumerate(filenode_as_lines_tokens(filenode), 1))
146 in enumerate(filenode_as_lines_tokens(filenode), 1))
147
147
148 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
148 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
149
149
150 for annotation, group in grouped_annotations_lines:
150 for annotation, group in grouped_annotations_lines:
151 yield (
151 yield (
152 annotation, [(line_no, tokens)
152 annotation, [(line_no, tokens)
153 for (_, line_no, tokens) in group]
153 for (_, line_no, tokens) in group]
154 )
154 )
155
155
156
156
157 def render_tokenstream(tokenstream):
157 def render_tokenstream(tokenstream):
158 result = []
158 result = []
159 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
159 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
160
160
161 if token_class:
161 if token_class:
162 result.append(u'<span class="%s">' % token_class)
162 result.append(u'<span class="%s">' % token_class)
163 else:
163 else:
164 result.append(u'<span>')
164 result.append(u'<span>')
165
165
166 for op_tag, token_text in token_ops_texts:
166 for op_tag, token_text in token_ops_texts:
167
167
168 if op_tag:
168 if op_tag:
169 result.append(u'<%s>' % op_tag)
169 result.append(u'<%s>' % op_tag)
170
170
171 # NOTE(marcink): in some cases of mixed encodings, we might run into
171 # NOTE(marcink): in some cases of mixed encodings, we might run into
172 # troubles in the html_escape, in this case we say unicode force on token_text
172 # troubles in the html_escape, in this case we say unicode force on token_text
173 # that would ensure "correct" data even with the cost of rendered
173 # that would ensure "correct" data even with the cost of rendered
174 try:
174 try:
175 escaped_text = html_escape(token_text)
175 escaped_text = html_escape(token_text)
176 except TypeError:
176 except TypeError:
177 escaped_text = html_escape(safe_unicode(token_text))
177 escaped_text = html_escape(safe_unicode(token_text))
178
178
179 # TODO: dan: investigate showing hidden characters like space/nl/tab
179 # TODO: dan: investigate showing hidden characters like space/nl/tab
180 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
180 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
181 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
181 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
182 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
182 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
183
183
184 result.append(escaped_text)
184 result.append(escaped_text)
185
185
186 if op_tag:
186 if op_tag:
187 result.append(u'</%s>' % op_tag)
187 result.append(u'</%s>' % op_tag)
188
188
189 result.append(u'</span>')
189 result.append(u'</span>')
190
190
191 html = ''.join(result)
191 html = ''.join(result)
192 return html
192 return html
193
193
194
194
195 def rollup_tokenstream(tokenstream):
195 def rollup_tokenstream(tokenstream):
196 """
196 """
197 Group a token stream of the format:
197 Group a token stream of the format:
198
198
199 ('class', 'op', 'text')
199 ('class', 'op', 'text')
200 or
200 or
201 ('class', 'text')
201 ('class', 'text')
202
202
203 into
203 into
204
204
205 [('class1',
205 [('class1',
206 [('op1', 'text'),
206 [('op1', 'text'),
207 ('op2', 'text')]),
207 ('op2', 'text')]),
208 ('class2',
208 ('class2',
209 [('op3', 'text')])]
209 [('op3', 'text')])]
210
210
211 This is used to get the minimal tags necessary when
211 This is used to get the minimal tags necessary when
212 rendering to html eg for a token stream ie.
212 rendering to html eg for a token stream ie.
213
213
214 <span class="A"><ins>he</ins>llo</span>
214 <span class="A"><ins>he</ins>llo</span>
215 vs
215 vs
216 <span class="A"><ins>he</ins></span><span class="A">llo</span>
216 <span class="A"><ins>he</ins></span><span class="A">llo</span>
217
217
218 If a 2 tuple is passed in, the output op will be an empty string.
218 If a 2 tuple is passed in, the output op will be an empty string.
219
219
220 eg:
220 eg:
221
221
222 >>> rollup_tokenstream([('classA', '', 'h'),
222 >>> rollup_tokenstream([('classA', '', 'h'),
223 ('classA', 'del', 'ell'),
223 ('classA', 'del', 'ell'),
224 ('classA', '', 'o'),
224 ('classA', '', 'o'),
225 ('classB', '', ' '),
225 ('classB', '', ' '),
226 ('classA', '', 'the'),
226 ('classA', '', 'the'),
227 ('classA', '', 're'),
227 ('classA', '', 're'),
228 ])
228 ])
229
229
230 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
230 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
231 ('classB', [('', ' ')],
231 ('classB', [('', ' ')],
232 ('classA', [('', 'there')]]
232 ('classA', [('', 'there')]]
233
233
234 """
234 """
235 if tokenstream and len(tokenstream[0]) == 2:
235 if tokenstream and len(tokenstream[0]) == 2:
236 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
236 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
237
237
238 result = []
238 result = []
239 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
239 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
240 ops = []
240 ops = []
241 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
241 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
242 text_buffer = []
242 text_buffer = []
243 for t_class, t_op, t_text in token_text_list:
243 for t_class, t_op, t_text in token_text_list:
244 text_buffer.append(t_text)
244 text_buffer.append(t_text)
245 ops.append((token_op, ''.join(text_buffer)))
245 ops.append((token_op, ''.join(text_buffer)))
246 result.append((token_class, ops))
246 result.append((token_class, ops))
247 return result
247 return result
248
248
249
249
250 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
250 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
251 """
251 """
252 Converts a list of (token_class, token_text) tuples to a list of
252 Converts a list of (token_class, token_text) tuples to a list of
253 (token_class, token_op, token_text) tuples where token_op is one of
253 (token_class, token_op, token_text) tuples where token_op is one of
254 ('ins', 'del', '')
254 ('ins', 'del', '')
255
255
256 :param old_tokens: list of (token_class, token_text) tuples of old line
256 :param old_tokens: list of (token_class, token_text) tuples of old line
257 :param new_tokens: list of (token_class, token_text) tuples of new line
257 :param new_tokens: list of (token_class, token_text) tuples of new line
258 :param use_diff_match_patch: boolean, will use google's diff match patch
258 :param use_diff_match_patch: boolean, will use google's diff match patch
259 library which has options to 'smooth' out the character by character
259 library which has options to 'smooth' out the character by character
260 differences making nicer ins/del blocks
260 differences making nicer ins/del blocks
261 """
261 """
262
262
263 old_tokens_result = []
263 old_tokens_result = []
264 new_tokens_result = []
264 new_tokens_result = []
265
265
266 similarity = difflib.SequenceMatcher(None,
266 similarity = difflib.SequenceMatcher(None,
267 ''.join(token_text for token_class, token_text in old_tokens),
267 ''.join(token_text for token_class, token_text in old_tokens),
268 ''.join(token_text for token_class, token_text in new_tokens)
268 ''.join(token_text for token_class, token_text in new_tokens)
269 ).ratio()
269 ).ratio()
270
270
271 if similarity < 0.6: # return, the blocks are too different
271 if similarity < 0.6: # return, the blocks are too different
272 for token_class, token_text in old_tokens:
272 for token_class, token_text in old_tokens:
273 old_tokens_result.append((token_class, '', token_text))
273 old_tokens_result.append((token_class, '', token_text))
274 for token_class, token_text in new_tokens:
274 for token_class, token_text in new_tokens:
275 new_tokens_result.append((token_class, '', token_text))
275 new_tokens_result.append((token_class, '', token_text))
276 return old_tokens_result, new_tokens_result, similarity
276 return old_tokens_result, new_tokens_result, similarity
277
277
278 token_sequence_matcher = difflib.SequenceMatcher(None,
278 token_sequence_matcher = difflib.SequenceMatcher(None,
279 [x[1] for x in old_tokens],
279 [x[1] for x in old_tokens],
280 [x[1] for x in new_tokens])
280 [x[1] for x in new_tokens])
281
281
282 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
282 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
283 # check the differences by token block types first to give a more
283 # check the differences by token block types first to give a more
284 # nicer "block" level replacement vs character diffs
284 # nicer "block" level replacement vs character diffs
285
285
286 if tag == 'equal':
286 if tag == 'equal':
287 for token_class, token_text in old_tokens[o1:o2]:
287 for token_class, token_text in old_tokens[o1:o2]:
288 old_tokens_result.append((token_class, '', token_text))
288 old_tokens_result.append((token_class, '', token_text))
289 for token_class, token_text in new_tokens[n1:n2]:
289 for token_class, token_text in new_tokens[n1:n2]:
290 new_tokens_result.append((token_class, '', token_text))
290 new_tokens_result.append((token_class, '', token_text))
291 elif tag == 'delete':
291 elif tag == 'delete':
292 for token_class, token_text in old_tokens[o1:o2]:
292 for token_class, token_text in old_tokens[o1:o2]:
293 old_tokens_result.append((token_class, 'del', token_text))
293 old_tokens_result.append((token_class, 'del', token_text))
294 elif tag == 'insert':
294 elif tag == 'insert':
295 for token_class, token_text in new_tokens[n1:n2]:
295 for token_class, token_text in new_tokens[n1:n2]:
296 new_tokens_result.append((token_class, 'ins', token_text))
296 new_tokens_result.append((token_class, 'ins', token_text))
297 elif tag == 'replace':
297 elif tag == 'replace':
298 # if same type token blocks must be replaced, do a diff on the
298 # if same type token blocks must be replaced, do a diff on the
299 # characters in the token blocks to show individual changes
299 # characters in the token blocks to show individual changes
300
300
301 old_char_tokens = []
301 old_char_tokens = []
302 new_char_tokens = []
302 new_char_tokens = []
303 for token_class, token_text in old_tokens[o1:o2]:
303 for token_class, token_text in old_tokens[o1:o2]:
304 for char in token_text:
304 for char in token_text:
305 old_char_tokens.append((token_class, char))
305 old_char_tokens.append((token_class, char))
306
306
307 for token_class, token_text in new_tokens[n1:n2]:
307 for token_class, token_text in new_tokens[n1:n2]:
308 for char in token_text:
308 for char in token_text:
309 new_char_tokens.append((token_class, char))
309 new_char_tokens.append((token_class, char))
310
310
311 old_string = ''.join([token_text for
311 old_string = ''.join([token_text for
312 token_class, token_text in old_char_tokens])
312 token_class, token_text in old_char_tokens])
313 new_string = ''.join([token_text for
313 new_string = ''.join([token_text for
314 token_class, token_text in new_char_tokens])
314 token_class, token_text in new_char_tokens])
315
315
316 char_sequence = difflib.SequenceMatcher(
316 char_sequence = difflib.SequenceMatcher(
317 None, old_string, new_string)
317 None, old_string, new_string)
318 copcodes = char_sequence.get_opcodes()
318 copcodes = char_sequence.get_opcodes()
319 obuffer, nbuffer = [], []
319 obuffer, nbuffer = [], []
320
320
321 if use_diff_match_patch:
321 if use_diff_match_patch:
322 dmp = diff_match_patch()
322 dmp = diff_match_patch()
323 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
323 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
324 reps = dmp.diff_main(old_string, new_string)
324 reps = dmp.diff_main(old_string, new_string)
325 dmp.diff_cleanupEfficiency(reps)
325 dmp.diff_cleanupEfficiency(reps)
326
326
327 a, b = 0, 0
327 a, b = 0, 0
328 for op, rep in reps:
328 for op, rep in reps:
329 l = len(rep)
329 l = len(rep)
330 if op == 0:
330 if op == 0:
331 for i, c in enumerate(rep):
331 for i, c in enumerate(rep):
332 obuffer.append((old_char_tokens[a+i][0], '', c))
332 obuffer.append((old_char_tokens[a+i][0], '', c))
333 nbuffer.append((new_char_tokens[b+i][0], '', c))
333 nbuffer.append((new_char_tokens[b+i][0], '', c))
334 a += l
334 a += l
335 b += l
335 b += l
336 elif op == -1:
336 elif op == -1:
337 for i, c in enumerate(rep):
337 for i, c in enumerate(rep):
338 obuffer.append((old_char_tokens[a+i][0], 'del', c))
338 obuffer.append((old_char_tokens[a+i][0], 'del', c))
339 a += l
339 a += l
340 elif op == 1:
340 elif op == 1:
341 for i, c in enumerate(rep):
341 for i, c in enumerate(rep):
342 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
342 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
343 b += l
343 b += l
344 else:
344 else:
345 for ctag, co1, co2, cn1, cn2 in copcodes:
345 for ctag, co1, co2, cn1, cn2 in copcodes:
346 if ctag == 'equal':
346 if ctag == 'equal':
347 for token_class, token_text in old_char_tokens[co1:co2]:
347 for token_class, token_text in old_char_tokens[co1:co2]:
348 obuffer.append((token_class, '', token_text))
348 obuffer.append((token_class, '', token_text))
349 for token_class, token_text in new_char_tokens[cn1:cn2]:
349 for token_class, token_text in new_char_tokens[cn1:cn2]:
350 nbuffer.append((token_class, '', token_text))
350 nbuffer.append((token_class, '', token_text))
351 elif ctag == 'delete':
351 elif ctag == 'delete':
352 for token_class, token_text in old_char_tokens[co1:co2]:
352 for token_class, token_text in old_char_tokens[co1:co2]:
353 obuffer.append((token_class, 'del', token_text))
353 obuffer.append((token_class, 'del', token_text))
354 elif ctag == 'insert':
354 elif ctag == 'insert':
355 for token_class, token_text in new_char_tokens[cn1:cn2]:
355 for token_class, token_text in new_char_tokens[cn1:cn2]:
356 nbuffer.append((token_class, 'ins', token_text))
356 nbuffer.append((token_class, 'ins', token_text))
357 elif ctag == 'replace':
357 elif ctag == 'replace':
358 for token_class, token_text in old_char_tokens[co1:co2]:
358 for token_class, token_text in old_char_tokens[co1:co2]:
359 obuffer.append((token_class, 'del', token_text))
359 obuffer.append((token_class, 'del', token_text))
360 for token_class, token_text in new_char_tokens[cn1:cn2]:
360 for token_class, token_text in new_char_tokens[cn1:cn2]:
361 nbuffer.append((token_class, 'ins', token_text))
361 nbuffer.append((token_class, 'ins', token_text))
362
362
363 old_tokens_result.extend(obuffer)
363 old_tokens_result.extend(obuffer)
364 new_tokens_result.extend(nbuffer)
364 new_tokens_result.extend(nbuffer)
365
365
366 return old_tokens_result, new_tokens_result, similarity
366 return old_tokens_result, new_tokens_result, similarity
367
367
368
368
369 def diffset_node_getter(commit):
369 def diffset_node_getter(commit):
370 def get_node(fname):
370 def get_node(fname):
371 try:
371 try:
372 return commit.get_node(fname)
372 return commit.get_node(fname)
373 except NodeDoesNotExistError:
373 except NodeDoesNotExistError:
374 return None
374 return None
375
375
376 return get_node
376 return get_node
377
377
378
378
379 class DiffSet(object):
379 class DiffSet(object):
380 """
380 """
381 An object for parsing the diff result from diffs.DiffProcessor and
381 An object for parsing the diff result from diffs.DiffProcessor and
382 adding highlighting, side by side/unified renderings and line diffs
382 adding highlighting, side by side/unified renderings and line diffs
383 """
383 """
384
384
385 HL_REAL = 'REAL' # highlights using original file, slow
385 HL_REAL = 'REAL' # highlights using original file, slow
386 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
386 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
387 # in the case of multiline code
387 # in the case of multiline code
388 HL_NONE = 'NONE' # no highlighting, fastest
388 HL_NONE = 'NONE' # no highlighting, fastest
389
389
390 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
390 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
391 source_repo_name=None,
391 source_repo_name=None,
392 source_node_getter=lambda filename: None,
392 source_node_getter=lambda filename: None,
393 target_repo_name=None,
393 target_repo_name=None,
394 target_node_getter=lambda filename: None,
394 target_node_getter=lambda filename: None,
395 source_nodes=None, target_nodes=None,
395 source_nodes=None, target_nodes=None,
396 # files over this size will use fast highlighting
396 # files over this size will use fast highlighting
397 max_file_size_limit=150 * 1024,
397 max_file_size_limit=150 * 1024,
398 ):
398 ):
399
399
400 self.highlight_mode = highlight_mode
400 self.highlight_mode = highlight_mode
401 self.highlighted_filenodes = {}
401 self.highlighted_filenodes = {}
402 self.source_node_getter = source_node_getter
402 self.source_node_getter = source_node_getter
403 self.target_node_getter = target_node_getter
403 self.target_node_getter = target_node_getter
404 self.source_nodes = source_nodes or {}
404 self.source_nodes = source_nodes or {}
405 self.target_nodes = target_nodes or {}
405 self.target_nodes = target_nodes or {}
406 self.repo_name = repo_name
406 self.repo_name = repo_name
407 self.target_repo_name = target_repo_name or repo_name
407 self.target_repo_name = target_repo_name or repo_name
408 self.source_repo_name = source_repo_name or repo_name
408 self.source_repo_name = source_repo_name or repo_name
409 self.max_file_size_limit = max_file_size_limit
409 self.max_file_size_limit = max_file_size_limit
410
410
411 def render_patchset(self, patchset, source_ref=None, target_ref=None):
411 def render_patchset(self, patchset, source_ref=None, target_ref=None):
412 diffset = AttributeDict(dict(
412 diffset = AttributeDict(dict(
413 lines_added=0,
413 lines_added=0,
414 lines_deleted=0,
414 lines_deleted=0,
415 changed_files=0,
415 changed_files=0,
416 files=[],
416 files=[],
417 file_stats={},
417 file_stats={},
418 limited_diff=isinstance(patchset, LimitedDiffContainer),
418 limited_diff=isinstance(patchset, LimitedDiffContainer),
419 repo_name=self.repo_name,
419 repo_name=self.repo_name,
420 target_repo_name=self.target_repo_name,
420 target_repo_name=self.target_repo_name,
421 source_repo_name=self.source_repo_name,
421 source_repo_name=self.source_repo_name,
422 source_ref=source_ref,
422 source_ref=source_ref,
423 target_ref=target_ref,
423 target_ref=target_ref,
424 ))
424 ))
425 for patch in patchset:
425 for patch in patchset:
426 diffset.file_stats[patch['filename']] = patch['stats']
426 diffset.file_stats[patch['filename']] = patch['stats']
427 filediff = self.render_patch(patch)
427 filediff = self.render_patch(patch)
428 filediff.diffset = StrictAttributeDict(dict(
428 filediff.diffset = StrictAttributeDict(dict(
429 source_ref=diffset.source_ref,
429 source_ref=diffset.source_ref,
430 target_ref=diffset.target_ref,
430 target_ref=diffset.target_ref,
431 repo_name=diffset.repo_name,
431 repo_name=diffset.repo_name,
432 source_repo_name=diffset.source_repo_name,
432 source_repo_name=diffset.source_repo_name,
433 target_repo_name=diffset.target_repo_name,
433 target_repo_name=diffset.target_repo_name,
434 ))
434 ))
435 diffset.files.append(filediff)
435 diffset.files.append(filediff)
436 diffset.changed_files += 1
436 diffset.changed_files += 1
437 if not patch['stats']['binary']:
437 if not patch['stats']['binary']:
438 diffset.lines_added += patch['stats']['added']
438 diffset.lines_added += patch['stats']['added']
439 diffset.lines_deleted += patch['stats']['deleted']
439 diffset.lines_deleted += patch['stats']['deleted']
440
440
441 return diffset
441 return diffset
442
442
443 _lexer_cache = {}
443 _lexer_cache = {}
444
444
445 def _get_lexer_for_filename(self, filename, filenode=None):
445 def _get_lexer_for_filename(self, filename, filenode=None):
446 # cached because we might need to call it twice for source/target
446 # cached because we might need to call it twice for source/target
447 if filename not in self._lexer_cache:
447 if filename not in self._lexer_cache:
448 if filenode:
448 if filenode:
449 lexer = filenode.lexer
449 lexer = filenode.lexer
450 extension = filenode.extension
450 extension = filenode.extension
451 else:
451 else:
452 lexer = FileNode.get_lexer(filename=filename)
452 lexer = FileNode.get_lexer(filename=filename)
453 extension = filename.split('.')[-1]
453 extension = filename.split('.')[-1]
454
454
455 lexer = get_custom_lexer(extension) or lexer
455 lexer = get_custom_lexer(extension) or lexer
456 self._lexer_cache[filename] = lexer
456 self._lexer_cache[filename] = lexer
457 return self._lexer_cache[filename]
457 return self._lexer_cache[filename]
458
458
459 def render_patch(self, patch):
459 def render_patch(self, patch):
460 log.debug('rendering diff for %r', patch['filename'])
460 log.debug('rendering diff for %r', patch['filename'])
461
461
462 source_filename = patch['original_filename']
462 source_filename = patch['original_filename']
463 target_filename = patch['filename']
463 target_filename = patch['filename']
464
464
465 source_lexer = plain_text_lexer
465 source_lexer = plain_text_lexer
466 target_lexer = plain_text_lexer
466 target_lexer = plain_text_lexer
467
467
468 if not patch['stats']['binary']:
468 if not patch['stats']['binary']:
469 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
469 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
470 hl_mode = node_hl_mode or self.highlight_mode
470 hl_mode = node_hl_mode or self.highlight_mode
471
471
472 if hl_mode == self.HL_REAL:
472 if hl_mode == self.HL_REAL:
473 if (source_filename and patch['operation'] in ('D', 'M')
473 if (source_filename and patch['operation'] in ('D', 'M')
474 and source_filename not in self.source_nodes):
474 and source_filename not in self.source_nodes):
475 self.source_nodes[source_filename] = (
475 self.source_nodes[source_filename] = (
476 self.source_node_getter(source_filename))
476 self.source_node_getter(source_filename))
477
477
478 if (target_filename and patch['operation'] in ('A', 'M')
478 if (target_filename and patch['operation'] in ('A', 'M')
479 and target_filename not in self.target_nodes):
479 and target_filename not in self.target_nodes):
480 self.target_nodes[target_filename] = (
480 self.target_nodes[target_filename] = (
481 self.target_node_getter(target_filename))
481 self.target_node_getter(target_filename))
482
482
483 elif hl_mode == self.HL_FAST:
483 elif hl_mode == self.HL_FAST:
484 source_lexer = self._get_lexer_for_filename(source_filename)
484 source_lexer = self._get_lexer_for_filename(source_filename)
485 target_lexer = self._get_lexer_for_filename(target_filename)
485 target_lexer = self._get_lexer_for_filename(target_filename)
486
486
487 source_file = self.source_nodes.get(source_filename, source_filename)
487 source_file = self.source_nodes.get(source_filename, source_filename)
488 target_file = self.target_nodes.get(target_filename, target_filename)
488 target_file = self.target_nodes.get(target_filename, target_filename)
489 raw_id_uid = ''
489 raw_id_uid = ''
490 if self.source_nodes.get(source_filename):
490 if self.source_nodes.get(source_filename):
491 raw_id_uid = self.source_nodes[source_filename].commit.raw_id
491 raw_id_uid = self.source_nodes[source_filename].commit.raw_id
492
492
493 if not raw_id_uid and self.target_nodes.get(target_filename):
493 if not raw_id_uid and self.target_nodes.get(target_filename):
494 # in case this is a new file we only have it in target
494 # in case this is a new file we only have it in target
495 raw_id_uid = self.target_nodes[target_filename].commit.raw_id
495 raw_id_uid = self.target_nodes[target_filename].commit.raw_id
496
496
497 source_filenode, target_filenode = None, None
497 source_filenode, target_filenode = None, None
498
498
499 # TODO: dan: FileNode.lexer works on the content of the file - which
499 # TODO: dan: FileNode.lexer works on the content of the file - which
500 # can be slow - issue #4289 explains a lexer clean up - which once
500 # can be slow - issue #4289 explains a lexer clean up - which once
501 # done can allow caching a lexer for a filenode to avoid the file lookup
501 # done can allow caching a lexer for a filenode to avoid the file lookup
502 if isinstance(source_file, FileNode):
502 if isinstance(source_file, FileNode):
503 source_filenode = source_file
503 source_filenode = source_file
504 #source_lexer = source_file.lexer
504 #source_lexer = source_file.lexer
505 source_lexer = self._get_lexer_for_filename(source_filename)
505 source_lexer = self._get_lexer_for_filename(source_filename)
506 source_file.lexer = source_lexer
506 source_file.lexer = source_lexer
507
507
508 if isinstance(target_file, FileNode):
508 if isinstance(target_file, FileNode):
509 target_filenode = target_file
509 target_filenode = target_file
510 #target_lexer = target_file.lexer
510 #target_lexer = target_file.lexer
511 target_lexer = self._get_lexer_for_filename(target_filename)
511 target_lexer = self._get_lexer_for_filename(target_filename)
512 target_file.lexer = target_lexer
512 target_file.lexer = target_lexer
513
513
514 source_file_path, target_file_path = None, None
514 source_file_path, target_file_path = None, None
515
515
516 if source_filename != '/dev/null':
516 if source_filename != '/dev/null':
517 source_file_path = source_filename
517 source_file_path = source_filename
518 if target_filename != '/dev/null':
518 if target_filename != '/dev/null':
519 target_file_path = target_filename
519 target_file_path = target_filename
520
520
521 source_file_type = source_lexer.name
521 source_file_type = source_lexer.name
522 target_file_type = target_lexer.name
522 target_file_type = target_lexer.name
523
523
524 filediff = AttributeDict({
524 filediff = AttributeDict({
525 'source_file_path': source_file_path,
525 'source_file_path': source_file_path,
526 'target_file_path': target_file_path,
526 'target_file_path': target_file_path,
527 'source_filenode': source_filenode,
527 'source_filenode': source_filenode,
528 'target_filenode': target_filenode,
528 'target_filenode': target_filenode,
529 'source_file_type': target_file_type,
529 'source_file_type': target_file_type,
530 'target_file_type': source_file_type,
530 'target_file_type': source_file_type,
531 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
531 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
532 'operation': patch['operation'],
532 'operation': patch['operation'],
533 'source_mode': patch['stats']['old_mode'],
533 'source_mode': patch['stats']['old_mode'],
534 'target_mode': patch['stats']['new_mode'],
534 'target_mode': patch['stats']['new_mode'],
535 'limited_diff': patch['is_limited_diff'],
535 'limited_diff': patch['is_limited_diff'],
536 'hunks': [],
536 'hunks': [],
537 'hunk_ops': None,
537 'hunk_ops': None,
538 'diffset': self,
538 'diffset': self,
539 'raw_id': raw_id_uid,
539 'raw_id': raw_id_uid,
540 })
540 })
541
541
542 file_chunks = patch['chunks'][1:]
542 file_chunks = patch['chunks'][1:]
543 for i, hunk in enumerate(file_chunks, 1):
543 for i, hunk in enumerate(file_chunks, 1):
544 hunkbit = self.parse_hunk(hunk, source_file, target_file)
544 hunkbit = self.parse_hunk(hunk, source_file, target_file)
545 hunkbit.source_file_path = source_file_path
545 hunkbit.source_file_path = source_file_path
546 hunkbit.target_file_path = target_file_path
546 hunkbit.target_file_path = target_file_path
547 hunkbit.index = i
547 hunkbit.index = i
548 filediff.hunks.append(hunkbit)
548 filediff.hunks.append(hunkbit)
549
549
550 # Simulate hunk on OPS type line which doesn't really contain any diff
550 # Simulate hunk on OPS type line which doesn't really contain any diff
551 # this allows commenting on those
551 # this allows commenting on those
552 if not file_chunks:
552 if not file_chunks:
553 actions = []
553 actions = []
554 for op_id, op_text in filediff.patch['stats']['ops'].items():
554 for op_id, op_text in filediff.patch['stats']['ops'].items():
555 if op_id == DEL_FILENODE:
555 if op_id == DEL_FILENODE:
556 actions.append(u'file was removed')
556 actions.append(u'file was removed')
557 elif op_id == BIN_FILENODE:
557 elif op_id == BIN_FILENODE:
558 actions.append(u'binary diff hidden')
558 actions.append(u'binary diff hidden')
559 else:
559 else:
560 actions.append(safe_unicode(op_text))
560 actions.append(safe_unicode(op_text))
561 action_line = u'NO CONTENT: ' + \
561 action_line = u'NO CONTENT: ' + \
562 u', '.join(actions) or u'UNDEFINED_ACTION'
562 u', '.join(actions) or u'UNDEFINED_ACTION'
563
563
564 hunk_ops = {'source_length': 0, 'source_start': 0,
564 hunk_ops = {'source_length': 0, 'source_start': 0,
565 'lines': [
565 'lines': [
566 {'new_lineno': 0, 'old_lineno': 1,
566 {'new_lineno': 0, 'old_lineno': 1,
567 'action': 'unmod-no-hl', 'line': action_line}
567 'action': 'unmod-no-hl', 'line': action_line}
568 ],
568 ],
569 'section_header': u'', 'target_start': 1, 'target_length': 1}
569 'section_header': u'', 'target_start': 1, 'target_length': 1}
570
570
571 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
571 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
572 hunkbit.source_file_path = source_file_path
572 hunkbit.source_file_path = source_file_path
573 hunkbit.target_file_path = target_file_path
573 hunkbit.target_file_path = target_file_path
574 filediff.hunk_ops = hunkbit
574 filediff.hunk_ops = hunkbit
575 return filediff
575 return filediff
576
576
577 def parse_hunk(self, hunk, source_file, target_file):
577 def parse_hunk(self, hunk, source_file, target_file):
578 result = AttributeDict(dict(
578 result = AttributeDict(dict(
579 source_start=hunk['source_start'],
579 source_start=hunk['source_start'],
580 source_length=hunk['source_length'],
580 source_length=hunk['source_length'],
581 target_start=hunk['target_start'],
581 target_start=hunk['target_start'],
582 target_length=hunk['target_length'],
582 target_length=hunk['target_length'],
583 section_header=hunk['section_header'],
583 section_header=hunk['section_header'],
584 lines=[],
584 lines=[],
585 ))
585 ))
586 before, after = [], []
586 before, after = [], []
587
587
588 for line in hunk['lines']:
588 for line in hunk['lines']:
589 if line['action'] in ['unmod', 'unmod-no-hl']:
589 if line['action'] in ['unmod', 'unmod-no-hl']:
590 no_hl = line['action'] == 'unmod-no-hl'
590 no_hl = line['action'] == 'unmod-no-hl'
591 result.lines.extend(
591 result.lines.extend(
592 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
592 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
593 after.append(line)
593 after.append(line)
594 before.append(line)
594 before.append(line)
595 elif line['action'] == 'add':
595 elif line['action'] == 'add':
596 after.append(line)
596 after.append(line)
597 elif line['action'] == 'del':
597 elif line['action'] == 'del':
598 before.append(line)
598 before.append(line)
599 elif line['action'] == 'old-no-nl':
599 elif line['action'] == 'old-no-nl':
600 before.append(line)
600 before.append(line)
601 elif line['action'] == 'new-no-nl':
601 elif line['action'] == 'new-no-nl':
602 after.append(line)
602 after.append(line)
603
603
604 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
604 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
605 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
605 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
606 result.lines.extend(
606 result.lines.extend(
607 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
607 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
608 # NOTE(marcink): we must keep list() call here so we can cache the result...
608 # NOTE(marcink): we must keep list() call here so we can cache the result...
609 result.unified = list(self.as_unified(result.lines))
609 result.unified = list(self.as_unified(result.lines))
610 result.sideside = result.lines
610 result.sideside = result.lines
611
611
612 return result
612 return result
613
613
614 def parse_lines(self, before_lines, after_lines, source_file, target_file,
614 def parse_lines(self, before_lines, after_lines, source_file, target_file,
615 no_hl=False):
615 no_hl=False):
616 # TODO: dan: investigate doing the diff comparison and fast highlighting
616 # TODO: dan: investigate doing the diff comparison and fast highlighting
617 # on the entire before and after buffered block lines rather than by
617 # on the entire before and after buffered block lines rather than by
618 # line, this means we can get better 'fast' highlighting if the context
618 # line, this means we can get better 'fast' highlighting if the context
619 # allows it - eg.
619 # allows it - eg.
620 # line 4: """
620 # line 4: """
621 # line 5: this gets highlighted as a string
621 # line 5: this gets highlighted as a string
622 # line 6: """
622 # line 6: """
623
623
624 lines = []
624 lines = []
625
625
626 before_newline = AttributeDict()
626 before_newline = AttributeDict()
627 after_newline = AttributeDict()
627 after_newline = AttributeDict()
628 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
628 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
629 before_newline_line = before_lines.pop(-1)
629 before_newline_line = before_lines.pop(-1)
630 before_newline.content = '\n {}'.format(
630 before_newline.content = '\n {}'.format(
631 render_tokenstream(
631 render_tokenstream(
632 [(x[0], '', x[1])
632 [(x[0], '', x[1])
633 for x in [('nonl', before_newline_line['line'])]]))
633 for x in [('nonl', before_newline_line['line'])]]))
634
634
635 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
635 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
636 after_newline_line = after_lines.pop(-1)
636 after_newline_line = after_lines.pop(-1)
637 after_newline.content = '\n {}'.format(
637 after_newline.content = '\n {}'.format(
638 render_tokenstream(
638 render_tokenstream(
639 [(x[0], '', x[1])
639 [(x[0], '', x[1])
640 for x in [('nonl', after_newline_line['line'])]]))
640 for x in [('nonl', after_newline_line['line'])]]))
641
641
642 while before_lines or after_lines:
642 while before_lines or after_lines:
643 before, after = None, None
643 before, after = None, None
644 before_tokens, after_tokens = None, None
644 before_tokens, after_tokens = None, None
645
645
646 if before_lines:
646 if before_lines:
647 before = before_lines.pop(0)
647 before = before_lines.pop(0)
648 if after_lines:
648 if after_lines:
649 after = after_lines.pop(0)
649 after = after_lines.pop(0)
650
650
651 original = AttributeDict()
651 original = AttributeDict()
652 modified = AttributeDict()
652 modified = AttributeDict()
653
653
654 if before:
654 if before:
655 if before['action'] == 'old-no-nl':
655 if before['action'] == 'old-no-nl':
656 before_tokens = [('nonl', before['line'])]
656 before_tokens = [('nonl', before['line'])]
657 else:
657 else:
658 before_tokens = self.get_line_tokens(
658 before_tokens = self.get_line_tokens(
659 line_text=before['line'], line_number=before['old_lineno'],
659 line_text=before['line'], line_number=before['old_lineno'],
660 input_file=source_file, no_hl=no_hl)
660 input_file=source_file, no_hl=no_hl)
661 original.lineno = before['old_lineno']
661 original.lineno = before['old_lineno']
662 original.content = before['line']
662 original.content = before['line']
663 original.action = self.action_to_op(before['action'])
663 original.action = self.action_to_op(before['action'])
664
664
665 original.get_comment_args = (
665 original.get_comment_args = (
666 source_file, 'o', before['old_lineno'])
666 source_file, 'o', before['old_lineno'])
667
667
668 if after:
668 if after:
669 if after['action'] == 'new-no-nl':
669 if after['action'] == 'new-no-nl':
670 after_tokens = [('nonl', after['line'])]
670 after_tokens = [('nonl', after['line'])]
671 else:
671 else:
672 after_tokens = self.get_line_tokens(
672 after_tokens = self.get_line_tokens(
673 line_text=after['line'], line_number=after['new_lineno'],
673 line_text=after['line'], line_number=after['new_lineno'],
674 input_file=target_file, no_hl=no_hl)
674 input_file=target_file, no_hl=no_hl)
675 modified.lineno = after['new_lineno']
675 modified.lineno = after['new_lineno']
676 modified.content = after['line']
676 modified.content = after['line']
677 modified.action = self.action_to_op(after['action'])
677 modified.action = self.action_to_op(after['action'])
678
678
679 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
679 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
680
680
681 # diff the lines
681 # diff the lines
682 if before_tokens and after_tokens:
682 if before_tokens and after_tokens:
683 o_tokens, m_tokens, similarity = tokens_diff(
683 o_tokens, m_tokens, similarity = tokens_diff(
684 before_tokens, after_tokens)
684 before_tokens, after_tokens)
685 original.content = render_tokenstream(o_tokens)
685 original.content = render_tokenstream(o_tokens)
686 modified.content = render_tokenstream(m_tokens)
686 modified.content = render_tokenstream(m_tokens)
687 elif before_tokens:
687 elif before_tokens:
688 original.content = render_tokenstream(
688 original.content = render_tokenstream(
689 [(x[0], '', x[1]) for x in before_tokens])
689 [(x[0], '', x[1]) for x in before_tokens])
690 elif after_tokens:
690 elif after_tokens:
691 modified.content = render_tokenstream(
691 modified.content = render_tokenstream(
692 [(x[0], '', x[1]) for x in after_tokens])
692 [(x[0], '', x[1]) for x in after_tokens])
693
693
694 if not before_lines and before_newline:
694 if not before_lines and before_newline:
695 original.content += before_newline.content
695 original.content += before_newline.content
696 before_newline = None
696 before_newline = None
697 if not after_lines and after_newline:
697 if not after_lines and after_newline:
698 modified.content += after_newline.content
698 modified.content += after_newline.content
699 after_newline = None
699 after_newline = None
700
700
701 lines.append(AttributeDict({
701 lines.append(AttributeDict({
702 'original': original,
702 'original': original,
703 'modified': modified,
703 'modified': modified,
704 }))
704 }))
705
705
706 return lines
706 return lines
707
707
708 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
708 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
709 filenode = None
709 filenode = None
710 filename = None
710 filename = None
711
711
712 if isinstance(input_file, compat.string_types):
712 if isinstance(input_file, compat.string_types):
713 filename = input_file
713 filename = input_file
714 elif isinstance(input_file, FileNode):
714 elif isinstance(input_file, FileNode):
715 filenode = input_file
715 filenode = input_file
716 filename = input_file.unicode_path
716 filename = input_file.unicode_path
717
717
718 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
718 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
719 if hl_mode == self.HL_REAL and filenode:
719 if hl_mode == self.HL_REAL and filenode:
720 lexer = self._get_lexer_for_filename(filename)
720 lexer = self._get_lexer_for_filename(filename)
721 file_size_allowed = input_file.size < self.max_file_size_limit
721 file_size_allowed = input_file.size < self.max_file_size_limit
722 if line_number and file_size_allowed:
722 if line_number and file_size_allowed:
723 return self.get_tokenized_filenode_line(
723 return self.get_tokenized_filenode_line(
724 input_file, line_number, lexer)
724 input_file, line_number, lexer)
725
725
726 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
726 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
727 lexer = self._get_lexer_for_filename(filename)
727 lexer = self._get_lexer_for_filename(filename)
728 return list(tokenize_string(line_text, lexer))
728 return list(tokenize_string(line_text, lexer))
729
729
730 return list(tokenize_string(line_text, plain_text_lexer))
730 return list(tokenize_string(line_text, plain_text_lexer))
731
731
732 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
732 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
733
733
734 if filenode not in self.highlighted_filenodes:
734 if filenode not in self.highlighted_filenodes:
735 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
735 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
736 self.highlighted_filenodes[filenode] = tokenized_lines
736 self.highlighted_filenodes[filenode] = tokenized_lines
737
737
738 try:
738 try:
739 return self.highlighted_filenodes[filenode][line_number - 1]
739 return self.highlighted_filenodes[filenode][line_number - 1]
740 except Exception:
740 except Exception:
741 log.exception('diff rendering error')
741 return [('', u'rhodecode diff rendering error')]
742 return [('', u'rhodecode diff rendering error')]
742
743
743 def action_to_op(self, action):
744 def action_to_op(self, action):
744 return {
745 return {
745 'add': '+',
746 'add': '+',
746 'del': '-',
747 'del': '-',
747 'unmod': ' ',
748 'unmod': ' ',
748 'unmod-no-hl': ' ',
749 'unmod-no-hl': ' ',
749 'old-no-nl': ' ',
750 'old-no-nl': ' ',
750 'new-no-nl': ' ',
751 'new-no-nl': ' ',
751 }.get(action, action)
752 }.get(action, action)
752
753
753 def as_unified(self, lines):
754 def as_unified(self, lines):
754 """
755 """
755 Return a generator that yields the lines of a diff in unified order
756 Return a generator that yields the lines of a diff in unified order
756 """
757 """
757 def generator():
758 def generator():
758 buf = []
759 buf = []
759 for line in lines:
760 for line in lines:
760
761
761 if buf and not line.original or line.original.action == ' ':
762 if buf and not line.original or line.original.action == ' ':
762 for b in buf:
763 for b in buf:
763 yield b
764 yield b
764 buf = []
765 buf = []
765
766
766 if line.original:
767 if line.original:
767 if line.original.action == ' ':
768 if line.original.action == ' ':
768 yield (line.original.lineno, line.modified.lineno,
769 yield (line.original.lineno, line.modified.lineno,
769 line.original.action, line.original.content,
770 line.original.action, line.original.content,
770 line.original.get_comment_args)
771 line.original.get_comment_args)
771 continue
772 continue
772
773
773 if line.original.action == '-':
774 if line.original.action == '-':
774 yield (line.original.lineno, None,
775 yield (line.original.lineno, None,
775 line.original.action, line.original.content,
776 line.original.action, line.original.content,
776 line.original.get_comment_args)
777 line.original.get_comment_args)
777
778
778 if line.modified.action == '+':
779 if line.modified.action == '+':
779 buf.append((
780 buf.append((
780 None, line.modified.lineno,
781 None, line.modified.lineno,
781 line.modified.action, line.modified.content,
782 line.modified.action, line.modified.content,
782 line.modified.get_comment_args))
783 line.modified.get_comment_args))
783 continue
784 continue
784
785
785 if line.modified:
786 if line.modified:
786 yield (None, line.modified.lineno,
787 yield (None, line.modified.lineno,
787 line.modified.action, line.modified.content,
788 line.modified.action, line.modified.content,
788 line.modified.get_comment_args)
789 line.modified.get_comment_args)
789
790
790 for b in buf:
791 for b in buf:
791 yield b
792 yield b
792
793
793 return generator()
794 return generator()
General Comments 0
You need to be logged in to leave comments. Login now