##// END OF EJS Templates
diffs: fixed problem with potential diff display.
milka -
r4592:8ef51af3 stable
parent child Browse files
Show More
@@ -1,794 +1,798 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2020 RhodeCode GmbH
3 # Copyright (C) 2011-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 import difflib
23 from itertools import groupby
23 from itertools import groupby
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from pygments.lexers.special import TextLexer, Token
27 from pygments.lexers.special import TextLexer, Token
28 from pygments.lexers import get_lexer_by_name
28 from pygments.lexers import get_lexer_by_name
29 from pyramid import compat
29 from pyramid import compat
30
30
31 from rhodecode.lib.helpers import (
31 from rhodecode.lib.helpers import (
32 get_lexer_for_filenode, html_escape, get_custom_lexer)
32 get_lexer_for_filenode, html_escape, get_custom_lexer)
33 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
33 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
34 from rhodecode.lib.vcs.nodes import FileNode
34 from rhodecode.lib.vcs.nodes import FileNode
35 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
35 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
36 from rhodecode.lib.diff_match_patch import diff_match_patch
36 from rhodecode.lib.diff_match_patch import diff_match_patch
37 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
37 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
38
38
39
39
40 plain_text_lexer = get_lexer_by_name(
40 plain_text_lexer = get_lexer_by_name(
41 'text', stripall=False, stripnl=False, ensurenl=False)
41 'text', stripall=False, stripnl=False, ensurenl=False)
42
42
43
43
44 log = logging.getLogger(__name__)
44 log = logging.getLogger(__name__)
45
45
46
46
47 def filenode_as_lines_tokens(filenode, lexer=None):
47 def filenode_as_lines_tokens(filenode, lexer=None):
48 org_lexer = lexer
48 org_lexer = lexer
49 lexer = lexer or get_lexer_for_filenode(filenode)
49 lexer = lexer or get_lexer_for_filenode(filenode)
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
51 lexer, filenode, org_lexer)
51 lexer, filenode, org_lexer)
52 content = filenode.content
52 content = filenode.content
53 tokens = tokenize_string(content, lexer)
53 tokens = tokenize_string(content, lexer)
54 lines = split_token_stream(tokens, content)
54 lines = split_token_stream(tokens, content)
55 rv = list(lines)
55 rv = list(lines)
56 return rv
56 return rv
57
57
58
58
59 def tokenize_string(content, lexer):
59 def tokenize_string(content, lexer):
60 """
60 """
61 Use pygments to tokenize some content based on a lexer
61 Use pygments to tokenize some content based on a lexer
62 ensuring all original new lines and whitespace is preserved
62 ensuring all original new lines and whitespace is preserved
63 """
63 """
64
64
65 lexer.stripall = False
65 lexer.stripall = False
66 lexer.stripnl = False
66 lexer.stripnl = False
67 lexer.ensurenl = False
67 lexer.ensurenl = False
68
68
69 if isinstance(lexer, TextLexer):
69 if isinstance(lexer, TextLexer):
70 lexed = [(Token.Text, content)]
70 lexed = [(Token.Text, content)]
71 else:
71 else:
72 lexed = lex(content, lexer)
72 lexed = lex(content, lexer)
73
73
74 for token_type, token_text in lexed:
74 for token_type, token_text in lexed:
75 yield pygment_token_class(token_type), token_text
75 yield pygment_token_class(token_type), token_text
76
76
77
77
78 def split_token_stream(tokens, content):
78 def split_token_stream(tokens, content):
79 """
79 """
80 Take a list of (TokenType, text) tuples and split them by a string
80 Take a list of (TokenType, text) tuples and split them by a string
81
81
82 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
82 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
83 [(TEXT, 'some'), (TEXT, 'text'),
83 [(TEXT, 'some'), (TEXT, 'text'),
84 (TEXT, 'more'), (TEXT, 'text')]
84 (TEXT, 'more'), (TEXT, 'text')]
85 """
85 """
86
86
87 token_buffer = []
87 token_buffer = []
88 for token_class, token_text in tokens:
88 for token_class, token_text in tokens:
89 parts = token_text.split('\n')
89 parts = token_text.split('\n')
90 for part in parts[:-1]:
90 for part in parts[:-1]:
91 token_buffer.append((token_class, part))
91 token_buffer.append((token_class, part))
92 yield token_buffer
92 yield token_buffer
93 token_buffer = []
93 token_buffer = []
94
94
95 token_buffer.append((token_class, parts[-1]))
95 token_buffer.append((token_class, parts[-1]))
96
96
97 if token_buffer:
97 if token_buffer:
98 yield token_buffer
98 yield token_buffer
99 elif content:
99 elif content:
100 # this is a special case, we have the content, but tokenization didn't produce
100 # this is a special case, we have the content, but tokenization didn't produce
101 # any results. THis can happen if know file extensions like .css have some bogus
101 # any results. THis can happen if know file extensions like .css have some bogus
102 # unicode content without any newline characters
102 # unicode content without any newline characters
103 yield [(pygment_token_class(Token.Text), content)]
103 yield [(pygment_token_class(Token.Text), content)]
104
104
105
105
106 def filenode_as_annotated_lines_tokens(filenode):
106 def filenode_as_annotated_lines_tokens(filenode):
107 """
107 """
108 Take a file node and return a list of annotations => lines, if no annotation
108 Take a file node and return a list of annotations => lines, if no annotation
109 is found, it will be None.
109 is found, it will be None.
110
110
111 eg:
111 eg:
112
112
113 [
113 [
114 (annotation1, [
114 (annotation1, [
115 (1, line1_tokens_list),
115 (1, line1_tokens_list),
116 (2, line2_tokens_list),
116 (2, line2_tokens_list),
117 ]),
117 ]),
118 (annotation2, [
118 (annotation2, [
119 (3, line1_tokens_list),
119 (3, line1_tokens_list),
120 ]),
120 ]),
121 (None, [
121 (None, [
122 (4, line1_tokens_list),
122 (4, line1_tokens_list),
123 ]),
123 ]),
124 (annotation1, [
124 (annotation1, [
125 (5, line1_tokens_list),
125 (5, line1_tokens_list),
126 (6, line2_tokens_list),
126 (6, line2_tokens_list),
127 ])
127 ])
128 ]
128 ]
129 """
129 """
130
130
131 commit_cache = {} # cache commit_getter lookups
131 commit_cache = {} # cache commit_getter lookups
132
132
133 def _get_annotation(commit_id, commit_getter):
133 def _get_annotation(commit_id, commit_getter):
134 if commit_id not in commit_cache:
134 if commit_id not in commit_cache:
135 commit_cache[commit_id] = commit_getter()
135 commit_cache[commit_id] = commit_getter()
136 return commit_cache[commit_id]
136 return commit_cache[commit_id]
137
137
138 annotation_lookup = {
138 annotation_lookup = {
139 line_no: _get_annotation(commit_id, commit_getter)
139 line_no: _get_annotation(commit_id, commit_getter)
140 for line_no, commit_id, commit_getter, line_content
140 for line_no, commit_id, commit_getter, line_content
141 in filenode.annotate
141 in filenode.annotate
142 }
142 }
143
143
144 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
144 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
145 for line_no, tokens
145 for line_no, tokens
146 in enumerate(filenode_as_lines_tokens(filenode), 1))
146 in enumerate(filenode_as_lines_tokens(filenode), 1))
147
147
148 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
148 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
149
149
150 for annotation, group in grouped_annotations_lines:
150 for annotation, group in grouped_annotations_lines:
151 yield (
151 yield (
152 annotation, [(line_no, tokens)
152 annotation, [(line_no, tokens)
153 for (_, line_no, tokens) in group]
153 for (_, line_no, tokens) in group]
154 )
154 )
155
155
156
156
157 def render_tokenstream(tokenstream):
157 def render_tokenstream(tokenstream):
158 result = []
158 result = []
159 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
159 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
160
160
161 if token_class:
161 if token_class:
162 result.append(u'<span class="%s">' % token_class)
162 result.append(u'<span class="%s">' % token_class)
163 else:
163 else:
164 result.append(u'<span>')
164 result.append(u'<span>')
165
165
166 for op_tag, token_text in token_ops_texts:
166 for op_tag, token_text in token_ops_texts:
167
167
168 if op_tag:
168 if op_tag:
169 result.append(u'<%s>' % op_tag)
169 result.append(u'<%s>' % op_tag)
170
170
171 # NOTE(marcink): in some cases of mixed encodings, we might run into
171 # NOTE(marcink): in some cases of mixed encodings, we might run into
172 # troubles in the html_escape, in this case we say unicode force on token_text
172 # troubles in the html_escape, in this case we say unicode force on token_text
173 # that would ensure "correct" data even with the cost of rendered
173 # that would ensure "correct" data even with the cost of rendered
174 try:
174 try:
175 escaped_text = html_escape(token_text)
175 escaped_text = html_escape(token_text)
176 except TypeError:
176 except TypeError:
177 escaped_text = html_escape(safe_unicode(token_text))
177 escaped_text = html_escape(safe_unicode(token_text))
178
178
179 # TODO: dan: investigate showing hidden characters like space/nl/tab
179 # TODO: dan: investigate showing hidden characters like space/nl/tab
180 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
180 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
181 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
181 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
182 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
182 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
183
183
184 result.append(escaped_text)
184 result.append(escaped_text)
185
185
186 if op_tag:
186 if op_tag:
187 result.append(u'</%s>' % op_tag)
187 result.append(u'</%s>' % op_tag)
188
188
189 result.append(u'</span>')
189 result.append(u'</span>')
190
190
191 html = ''.join(result)
191 html = ''.join(result)
192 return html
192 return html
193
193
194
194
195 def rollup_tokenstream(tokenstream):
195 def rollup_tokenstream(tokenstream):
196 """
196 """
197 Group a token stream of the format:
197 Group a token stream of the format:
198
198
199 ('class', 'op', 'text')
199 ('class', 'op', 'text')
200 or
200 or
201 ('class', 'text')
201 ('class', 'text')
202
202
203 into
203 into
204
204
205 [('class1',
205 [('class1',
206 [('op1', 'text'),
206 [('op1', 'text'),
207 ('op2', 'text')]),
207 ('op2', 'text')]),
208 ('class2',
208 ('class2',
209 [('op3', 'text')])]
209 [('op3', 'text')])]
210
210
211 This is used to get the minimal tags necessary when
211 This is used to get the minimal tags necessary when
212 rendering to html eg for a token stream ie.
212 rendering to html eg for a token stream ie.
213
213
214 <span class="A"><ins>he</ins>llo</span>
214 <span class="A"><ins>he</ins>llo</span>
215 vs
215 vs
216 <span class="A"><ins>he</ins></span><span class="A">llo</span>
216 <span class="A"><ins>he</ins></span><span class="A">llo</span>
217
217
218 If a 2 tuple is passed in, the output op will be an empty string.
218 If a 2 tuple is passed in, the output op will be an empty string.
219
219
220 eg:
220 eg:
221
221
222 >>> rollup_tokenstream([('classA', '', 'h'),
222 >>> rollup_tokenstream([('classA', '', 'h'),
223 ('classA', 'del', 'ell'),
223 ('classA', 'del', 'ell'),
224 ('classA', '', 'o'),
224 ('classA', '', 'o'),
225 ('classB', '', ' '),
225 ('classB', '', ' '),
226 ('classA', '', 'the'),
226 ('classA', '', 'the'),
227 ('classA', '', 're'),
227 ('classA', '', 're'),
228 ])
228 ])
229
229
230 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
230 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
231 ('classB', [('', ' ')],
231 ('classB', [('', ' ')],
232 ('classA', [('', 'there')]]
232 ('classA', [('', 'there')]]
233
233
234 """
234 """
235 if tokenstream and len(tokenstream[0]) == 2:
235 if tokenstream and len(tokenstream[0]) == 2:
236 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
236 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
237
237
238 result = []
238 result = []
239 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
239 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
240 ops = []
240 ops = []
241 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
241 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
242 text_buffer = []
242 text_buffer = []
243 for t_class, t_op, t_text in token_text_list:
243 for t_class, t_op, t_text in token_text_list:
244 text_buffer.append(t_text)
244 text_buffer.append(t_text)
245 ops.append((token_op, ''.join(text_buffer)))
245 ops.append((token_op, ''.join(text_buffer)))
246 result.append((token_class, ops))
246 result.append((token_class, ops))
247 return result
247 return result
248
248
249
249
250 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
250 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
251 """
251 """
252 Converts a list of (token_class, token_text) tuples to a list of
252 Converts a list of (token_class, token_text) tuples to a list of
253 (token_class, token_op, token_text) tuples where token_op is one of
253 (token_class, token_op, token_text) tuples where token_op is one of
254 ('ins', 'del', '')
254 ('ins', 'del', '')
255
255
256 :param old_tokens: list of (token_class, token_text) tuples of old line
256 :param old_tokens: list of (token_class, token_text) tuples of old line
257 :param new_tokens: list of (token_class, token_text) tuples of new line
257 :param new_tokens: list of (token_class, token_text) tuples of new line
258 :param use_diff_match_patch: boolean, will use google's diff match patch
258 :param use_diff_match_patch: boolean, will use google's diff match patch
259 library which has options to 'smooth' out the character by character
259 library which has options to 'smooth' out the character by character
260 differences making nicer ins/del blocks
260 differences making nicer ins/del blocks
261 """
261 """
262
262
263 old_tokens_result = []
263 old_tokens_result = []
264 new_tokens_result = []
264 new_tokens_result = []
265
265
266 similarity = difflib.SequenceMatcher(None,
266 similarity = difflib.SequenceMatcher(None,
267 ''.join(token_text for token_class, token_text in old_tokens),
267 ''.join(token_text for token_class, token_text in old_tokens),
268 ''.join(token_text for token_class, token_text in new_tokens)
268 ''.join(token_text for token_class, token_text in new_tokens)
269 ).ratio()
269 ).ratio()
270
270
271 if similarity < 0.6: # return, the blocks are too different
271 if similarity < 0.6: # return, the blocks are too different
272 for token_class, token_text in old_tokens:
272 for token_class, token_text in old_tokens:
273 old_tokens_result.append((token_class, '', token_text))
273 old_tokens_result.append((token_class, '', token_text))
274 for token_class, token_text in new_tokens:
274 for token_class, token_text in new_tokens:
275 new_tokens_result.append((token_class, '', token_text))
275 new_tokens_result.append((token_class, '', token_text))
276 return old_tokens_result, new_tokens_result, similarity
276 return old_tokens_result, new_tokens_result, similarity
277
277
278 token_sequence_matcher = difflib.SequenceMatcher(None,
278 token_sequence_matcher = difflib.SequenceMatcher(None,
279 [x[1] for x in old_tokens],
279 [x[1] for x in old_tokens],
280 [x[1] for x in new_tokens])
280 [x[1] for x in new_tokens])
281
281
282 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
282 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
283 # check the differences by token block types first to give a more
283 # check the differences by token block types first to give a more
284 # nicer "block" level replacement vs character diffs
284 # nicer "block" level replacement vs character diffs
285
285
286 if tag == 'equal':
286 if tag == 'equal':
287 for token_class, token_text in old_tokens[o1:o2]:
287 for token_class, token_text in old_tokens[o1:o2]:
288 old_tokens_result.append((token_class, '', token_text))
288 old_tokens_result.append((token_class, '', token_text))
289 for token_class, token_text in new_tokens[n1:n2]:
289 for token_class, token_text in new_tokens[n1:n2]:
290 new_tokens_result.append((token_class, '', token_text))
290 new_tokens_result.append((token_class, '', token_text))
291 elif tag == 'delete':
291 elif tag == 'delete':
292 for token_class, token_text in old_tokens[o1:o2]:
292 for token_class, token_text in old_tokens[o1:o2]:
293 old_tokens_result.append((token_class, 'del', token_text))
293 old_tokens_result.append((token_class, 'del', token_text))
294 elif tag == 'insert':
294 elif tag == 'insert':
295 for token_class, token_text in new_tokens[n1:n2]:
295 for token_class, token_text in new_tokens[n1:n2]:
296 new_tokens_result.append((token_class, 'ins', token_text))
296 new_tokens_result.append((token_class, 'ins', token_text))
297 elif tag == 'replace':
297 elif tag == 'replace':
298 # if same type token blocks must be replaced, do a diff on the
298 # if same type token blocks must be replaced, do a diff on the
299 # characters in the token blocks to show individual changes
299 # characters in the token blocks to show individual changes
300
300
301 old_char_tokens = []
301 old_char_tokens = []
302 new_char_tokens = []
302 new_char_tokens = []
303 for token_class, token_text in old_tokens[o1:o2]:
303 for token_class, token_text in old_tokens[o1:o2]:
304 for char in token_text:
304 for char in token_text:
305 old_char_tokens.append((token_class, char))
305 old_char_tokens.append((token_class, char))
306
306
307 for token_class, token_text in new_tokens[n1:n2]:
307 for token_class, token_text in new_tokens[n1:n2]:
308 for char in token_text:
308 for char in token_text:
309 new_char_tokens.append((token_class, char))
309 new_char_tokens.append((token_class, char))
310
310
311 old_string = ''.join([token_text for
311 old_string = ''.join([token_text for
312 token_class, token_text in old_char_tokens])
312 token_class, token_text in old_char_tokens])
313 new_string = ''.join([token_text for
313 new_string = ''.join([token_text for
314 token_class, token_text in new_char_tokens])
314 token_class, token_text in new_char_tokens])
315
315
316 char_sequence = difflib.SequenceMatcher(
316 char_sequence = difflib.SequenceMatcher(
317 None, old_string, new_string)
317 None, old_string, new_string)
318 copcodes = char_sequence.get_opcodes()
318 copcodes = char_sequence.get_opcodes()
319 obuffer, nbuffer = [], []
319 obuffer, nbuffer = [], []
320
320
321 if use_diff_match_patch:
321 if use_diff_match_patch:
322 dmp = diff_match_patch()
322 dmp = diff_match_patch()
323 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
323 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
324 reps = dmp.diff_main(old_string, new_string)
324 reps = dmp.diff_main(old_string, new_string)
325 dmp.diff_cleanupEfficiency(reps)
325 dmp.diff_cleanupEfficiency(reps)
326
326
327 a, b = 0, 0
327 a, b = 0, 0
328 for op, rep in reps:
328 for op, rep in reps:
329 l = len(rep)
329 l = len(rep)
330 if op == 0:
330 if op == 0:
331 for i, c in enumerate(rep):
331 for i, c in enumerate(rep):
332 obuffer.append((old_char_tokens[a+i][0], '', c))
332 obuffer.append((old_char_tokens[a+i][0], '', c))
333 nbuffer.append((new_char_tokens[b+i][0], '', c))
333 nbuffer.append((new_char_tokens[b+i][0], '', c))
334 a += l
334 a += l
335 b += l
335 b += l
336 elif op == -1:
336 elif op == -1:
337 for i, c in enumerate(rep):
337 for i, c in enumerate(rep):
338 obuffer.append((old_char_tokens[a+i][0], 'del', c))
338 obuffer.append((old_char_tokens[a+i][0], 'del', c))
339 a += l
339 a += l
340 elif op == 1:
340 elif op == 1:
341 for i, c in enumerate(rep):
341 for i, c in enumerate(rep):
342 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
342 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
343 b += l
343 b += l
344 else:
344 else:
345 for ctag, co1, co2, cn1, cn2 in copcodes:
345 for ctag, co1, co2, cn1, cn2 in copcodes:
346 if ctag == 'equal':
346 if ctag == 'equal':
347 for token_class, token_text in old_char_tokens[co1:co2]:
347 for token_class, token_text in old_char_tokens[co1:co2]:
348 obuffer.append((token_class, '', token_text))
348 obuffer.append((token_class, '', token_text))
349 for token_class, token_text in new_char_tokens[cn1:cn2]:
349 for token_class, token_text in new_char_tokens[cn1:cn2]:
350 nbuffer.append((token_class, '', token_text))
350 nbuffer.append((token_class, '', token_text))
351 elif ctag == 'delete':
351 elif ctag == 'delete':
352 for token_class, token_text in old_char_tokens[co1:co2]:
352 for token_class, token_text in old_char_tokens[co1:co2]:
353 obuffer.append((token_class, 'del', token_text))
353 obuffer.append((token_class, 'del', token_text))
354 elif ctag == 'insert':
354 elif ctag == 'insert':
355 for token_class, token_text in new_char_tokens[cn1:cn2]:
355 for token_class, token_text in new_char_tokens[cn1:cn2]:
356 nbuffer.append((token_class, 'ins', token_text))
356 nbuffer.append((token_class, 'ins', token_text))
357 elif ctag == 'replace':
357 elif ctag == 'replace':
358 for token_class, token_text in old_char_tokens[co1:co2]:
358 for token_class, token_text in old_char_tokens[co1:co2]:
359 obuffer.append((token_class, 'del', token_text))
359 obuffer.append((token_class, 'del', token_text))
360 for token_class, token_text in new_char_tokens[cn1:cn2]:
360 for token_class, token_text in new_char_tokens[cn1:cn2]:
361 nbuffer.append((token_class, 'ins', token_text))
361 nbuffer.append((token_class, 'ins', token_text))
362
362
363 old_tokens_result.extend(obuffer)
363 old_tokens_result.extend(obuffer)
364 new_tokens_result.extend(nbuffer)
364 new_tokens_result.extend(nbuffer)
365
365
366 return old_tokens_result, new_tokens_result, similarity
366 return old_tokens_result, new_tokens_result, similarity
367
367
368
368
369 def diffset_node_getter(commit):
369 def diffset_node_getter(commit):
370 def get_node(fname):
370 def get_node(fname):
371 try:
371 try:
372 return commit.get_node(fname)
372 return commit.get_node(fname)
373 except NodeDoesNotExistError:
373 except NodeDoesNotExistError:
374 return None
374 return None
375
375
376 return get_node
376 return get_node
377
377
378
378
379 class DiffSet(object):
379 class DiffSet(object):
380 """
380 """
381 An object for parsing the diff result from diffs.DiffProcessor and
381 An object for parsing the diff result from diffs.DiffProcessor and
382 adding highlighting, side by side/unified renderings and line diffs
382 adding highlighting, side by side/unified renderings and line diffs
383 """
383 """
384
384
385 HL_REAL = 'REAL' # highlights using original file, slow
385 HL_REAL = 'REAL' # highlights using original file, slow
386 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
386 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
387 # in the case of multiline code
387 # in the case of multiline code
388 HL_NONE = 'NONE' # no highlighting, fastest
388 HL_NONE = 'NONE' # no highlighting, fastest
389
389
390 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
390 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
391 source_repo_name=None,
391 source_repo_name=None,
392 source_node_getter=lambda filename: None,
392 source_node_getter=lambda filename: None,
393 target_repo_name=None,
393 target_repo_name=None,
394 target_node_getter=lambda filename: None,
394 target_node_getter=lambda filename: None,
395 source_nodes=None, target_nodes=None,
395 source_nodes=None, target_nodes=None,
396 # files over this size will use fast highlighting
396 # files over this size will use fast highlighting
397 max_file_size_limit=150 * 1024,
397 max_file_size_limit=150 * 1024,
398 ):
398 ):
399
399
400 self.highlight_mode = highlight_mode
400 self.highlight_mode = highlight_mode
401 self.highlighted_filenodes = {}
401 self.highlighted_filenodes = {
402 'before': {},
403 'after': {}
404 }
402 self.source_node_getter = source_node_getter
405 self.source_node_getter = source_node_getter
403 self.target_node_getter = target_node_getter
406 self.target_node_getter = target_node_getter
404 self.source_nodes = source_nodes or {}
407 self.source_nodes = source_nodes or {}
405 self.target_nodes = target_nodes or {}
408 self.target_nodes = target_nodes or {}
406 self.repo_name = repo_name
409 self.repo_name = repo_name
407 self.target_repo_name = target_repo_name or repo_name
410 self.target_repo_name = target_repo_name or repo_name
408 self.source_repo_name = source_repo_name or repo_name
411 self.source_repo_name = source_repo_name or repo_name
409 self.max_file_size_limit = max_file_size_limit
412 self.max_file_size_limit = max_file_size_limit
410
413
411 def render_patchset(self, patchset, source_ref=None, target_ref=None):
414 def render_patchset(self, patchset, source_ref=None, target_ref=None):
412 diffset = AttributeDict(dict(
415 diffset = AttributeDict(dict(
413 lines_added=0,
416 lines_added=0,
414 lines_deleted=0,
417 lines_deleted=0,
415 changed_files=0,
418 changed_files=0,
416 files=[],
419 files=[],
417 file_stats={},
420 file_stats={},
418 limited_diff=isinstance(patchset, LimitedDiffContainer),
421 limited_diff=isinstance(patchset, LimitedDiffContainer),
419 repo_name=self.repo_name,
422 repo_name=self.repo_name,
420 target_repo_name=self.target_repo_name,
423 target_repo_name=self.target_repo_name,
421 source_repo_name=self.source_repo_name,
424 source_repo_name=self.source_repo_name,
422 source_ref=source_ref,
425 source_ref=source_ref,
423 target_ref=target_ref,
426 target_ref=target_ref,
424 ))
427 ))
425 for patch in patchset:
428 for patch in patchset:
426 diffset.file_stats[patch['filename']] = patch['stats']
429 diffset.file_stats[patch['filename']] = patch['stats']
427 filediff = self.render_patch(patch)
430 filediff = self.render_patch(patch)
428 filediff.diffset = StrictAttributeDict(dict(
431 filediff.diffset = StrictAttributeDict(dict(
429 source_ref=diffset.source_ref,
432 source_ref=diffset.source_ref,
430 target_ref=diffset.target_ref,
433 target_ref=diffset.target_ref,
431 repo_name=diffset.repo_name,
434 repo_name=diffset.repo_name,
432 source_repo_name=diffset.source_repo_name,
435 source_repo_name=diffset.source_repo_name,
433 target_repo_name=diffset.target_repo_name,
436 target_repo_name=diffset.target_repo_name,
434 ))
437 ))
435 diffset.files.append(filediff)
438 diffset.files.append(filediff)
436 diffset.changed_files += 1
439 diffset.changed_files += 1
437 if not patch['stats']['binary']:
440 if not patch['stats']['binary']:
438 diffset.lines_added += patch['stats']['added']
441 diffset.lines_added += patch['stats']['added']
439 diffset.lines_deleted += patch['stats']['deleted']
442 diffset.lines_deleted += patch['stats']['deleted']
440
443
441 return diffset
444 return diffset
442
445
443 _lexer_cache = {}
446 _lexer_cache = {}
444
447
445 def _get_lexer_for_filename(self, filename, filenode=None):
448 def _get_lexer_for_filename(self, filename, filenode=None):
446 # cached because we might need to call it twice for source/target
449 # cached because we might need to call it twice for source/target
447 if filename not in self._lexer_cache:
450 if filename not in self._lexer_cache:
448 if filenode:
451 if filenode:
449 lexer = filenode.lexer
452 lexer = filenode.lexer
450 extension = filenode.extension
453 extension = filenode.extension
451 else:
454 else:
452 lexer = FileNode.get_lexer(filename=filename)
455 lexer = FileNode.get_lexer(filename=filename)
453 extension = filename.split('.')[-1]
456 extension = filename.split('.')[-1]
454
457
455 lexer = get_custom_lexer(extension) or lexer
458 lexer = get_custom_lexer(extension) or lexer
456 self._lexer_cache[filename] = lexer
459 self._lexer_cache[filename] = lexer
457 return self._lexer_cache[filename]
460 return self._lexer_cache[filename]
458
461
459 def render_patch(self, patch):
462 def render_patch(self, patch):
460 log.debug('rendering diff for %r', patch['filename'])
463 log.debug('rendering diff for %r', patch['filename'])
461
464
462 source_filename = patch['original_filename']
465 source_filename = patch['original_filename']
463 target_filename = patch['filename']
466 target_filename = patch['filename']
464
467
465 source_lexer = plain_text_lexer
468 source_lexer = plain_text_lexer
466 target_lexer = plain_text_lexer
469 target_lexer = plain_text_lexer
467
470
468 if not patch['stats']['binary']:
471 if not patch['stats']['binary']:
469 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
472 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
470 hl_mode = node_hl_mode or self.highlight_mode
473 hl_mode = node_hl_mode or self.highlight_mode
471
474
472 if hl_mode == self.HL_REAL:
475 if hl_mode == self.HL_REAL:
473 if (source_filename and patch['operation'] in ('D', 'M')
476 if (source_filename and patch['operation'] in ('D', 'M')
474 and source_filename not in self.source_nodes):
477 and source_filename not in self.source_nodes):
475 self.source_nodes[source_filename] = (
478 self.source_nodes[source_filename] = (
476 self.source_node_getter(source_filename))
479 self.source_node_getter(source_filename))
477
480
478 if (target_filename and patch['operation'] in ('A', 'M')
481 if (target_filename and patch['operation'] in ('A', 'M')
479 and target_filename not in self.target_nodes):
482 and target_filename not in self.target_nodes):
480 self.target_nodes[target_filename] = (
483 self.target_nodes[target_filename] = (
481 self.target_node_getter(target_filename))
484 self.target_node_getter(target_filename))
482
485
483 elif hl_mode == self.HL_FAST:
486 elif hl_mode == self.HL_FAST:
484 source_lexer = self._get_lexer_for_filename(source_filename)
487 source_lexer = self._get_lexer_for_filename(source_filename)
485 target_lexer = self._get_lexer_for_filename(target_filename)
488 target_lexer = self._get_lexer_for_filename(target_filename)
486
489
487 source_file = self.source_nodes.get(source_filename, source_filename)
490 source_file = self.source_nodes.get(source_filename, source_filename)
488 target_file = self.target_nodes.get(target_filename, target_filename)
491 target_file = self.target_nodes.get(target_filename, target_filename)
489 raw_id_uid = ''
492 raw_id_uid = ''
490 if self.source_nodes.get(source_filename):
493 if self.source_nodes.get(source_filename):
491 raw_id_uid = self.source_nodes[source_filename].commit.raw_id
494 raw_id_uid = self.source_nodes[source_filename].commit.raw_id
492
495
493 if not raw_id_uid and self.target_nodes.get(target_filename):
496 if not raw_id_uid and self.target_nodes.get(target_filename):
494 # in case this is a new file we only have it in target
497 # in case this is a new file we only have it in target
495 raw_id_uid = self.target_nodes[target_filename].commit.raw_id
498 raw_id_uid = self.target_nodes[target_filename].commit.raw_id
496
499
497 source_filenode, target_filenode = None, None
500 source_filenode, target_filenode = None, None
498
501
499 # TODO: dan: FileNode.lexer works on the content of the file - which
502 # TODO: dan: FileNode.lexer works on the content of the file - which
500 # can be slow - issue #4289 explains a lexer clean up - which once
503 # can be slow - issue #4289 explains a lexer clean up - which once
501 # done can allow caching a lexer for a filenode to avoid the file lookup
504 # done can allow caching a lexer for a filenode to avoid the file lookup
502 if isinstance(source_file, FileNode):
505 if isinstance(source_file, FileNode):
503 source_filenode = source_file
506 source_filenode = source_file
504 #source_lexer = source_file.lexer
507 #source_lexer = source_file.lexer
505 source_lexer = self._get_lexer_for_filename(source_filename)
508 source_lexer = self._get_lexer_for_filename(source_filename)
506 source_file.lexer = source_lexer
509 source_file.lexer = source_lexer
507
510
508 if isinstance(target_file, FileNode):
511 if isinstance(target_file, FileNode):
509 target_filenode = target_file
512 target_filenode = target_file
510 #target_lexer = target_file.lexer
513 #target_lexer = target_file.lexer
511 target_lexer = self._get_lexer_for_filename(target_filename)
514 target_lexer = self._get_lexer_for_filename(target_filename)
512 target_file.lexer = target_lexer
515 target_file.lexer = target_lexer
513
516
514 source_file_path, target_file_path = None, None
517 source_file_path, target_file_path = None, None
515
518
516 if source_filename != '/dev/null':
519 if source_filename != '/dev/null':
517 source_file_path = source_filename
520 source_file_path = source_filename
518 if target_filename != '/dev/null':
521 if target_filename != '/dev/null':
519 target_file_path = target_filename
522 target_file_path = target_filename
520
523
521 source_file_type = source_lexer.name
524 source_file_type = source_lexer.name
522 target_file_type = target_lexer.name
525 target_file_type = target_lexer.name
523
526
524 filediff = AttributeDict({
527 filediff = AttributeDict({
525 'source_file_path': source_file_path,
528 'source_file_path': source_file_path,
526 'target_file_path': target_file_path,
529 'target_file_path': target_file_path,
527 'source_filenode': source_filenode,
530 'source_filenode': source_filenode,
528 'target_filenode': target_filenode,
531 'target_filenode': target_filenode,
529 'source_file_type': target_file_type,
532 'source_file_type': target_file_type,
530 'target_file_type': source_file_type,
533 'target_file_type': source_file_type,
531 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
534 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
532 'operation': patch['operation'],
535 'operation': patch['operation'],
533 'source_mode': patch['stats']['old_mode'],
536 'source_mode': patch['stats']['old_mode'],
534 'target_mode': patch['stats']['new_mode'],
537 'target_mode': patch['stats']['new_mode'],
535 'limited_diff': patch['is_limited_diff'],
538 'limited_diff': patch['is_limited_diff'],
536 'hunks': [],
539 'hunks': [],
537 'hunk_ops': None,
540 'hunk_ops': None,
538 'diffset': self,
541 'diffset': self,
539 'raw_id': raw_id_uid,
542 'raw_id': raw_id_uid,
540 })
543 })
541
544
542 file_chunks = patch['chunks'][1:]
545 file_chunks = patch['chunks'][1:]
543 for i, hunk in enumerate(file_chunks, 1):
546 for i, hunk in enumerate(file_chunks, 1):
544 hunkbit = self.parse_hunk(hunk, source_file, target_file)
547 hunkbit = self.parse_hunk(hunk, source_file, target_file)
545 hunkbit.source_file_path = source_file_path
548 hunkbit.source_file_path = source_file_path
546 hunkbit.target_file_path = target_file_path
549 hunkbit.target_file_path = target_file_path
547 hunkbit.index = i
550 hunkbit.index = i
548 filediff.hunks.append(hunkbit)
551 filediff.hunks.append(hunkbit)
549
552
550 # Simulate hunk on OPS type line which doesn't really contain any diff
553 # Simulate hunk on OPS type line which doesn't really contain any diff
551 # this allows commenting on those
554 # this allows commenting on those
552 if not file_chunks:
555 if not file_chunks:
553 actions = []
556 actions = []
554 for op_id, op_text in filediff.patch['stats']['ops'].items():
557 for op_id, op_text in filediff.patch['stats']['ops'].items():
555 if op_id == DEL_FILENODE:
558 if op_id == DEL_FILENODE:
556 actions.append(u'file was removed')
559 actions.append(u'file was removed')
557 elif op_id == BIN_FILENODE:
560 elif op_id == BIN_FILENODE:
558 actions.append(u'binary diff hidden')
561 actions.append(u'binary diff hidden')
559 else:
562 else:
560 actions.append(safe_unicode(op_text))
563 actions.append(safe_unicode(op_text))
561 action_line = u'NO CONTENT: ' + \
564 action_line = u'NO CONTENT: ' + \
562 u', '.join(actions) or u'UNDEFINED_ACTION'
565 u', '.join(actions) or u'UNDEFINED_ACTION'
563
566
564 hunk_ops = {'source_length': 0, 'source_start': 0,
567 hunk_ops = {'source_length': 0, 'source_start': 0,
565 'lines': [
568 'lines': [
566 {'new_lineno': 0, 'old_lineno': 1,
569 {'new_lineno': 0, 'old_lineno': 1,
567 'action': 'unmod-no-hl', 'line': action_line}
570 'action': 'unmod-no-hl', 'line': action_line}
568 ],
571 ],
569 'section_header': u'', 'target_start': 1, 'target_length': 1}
572 'section_header': u'', 'target_start': 1, 'target_length': 1}
570
573
571 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
574 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
572 hunkbit.source_file_path = source_file_path
575 hunkbit.source_file_path = source_file_path
573 hunkbit.target_file_path = target_file_path
576 hunkbit.target_file_path = target_file_path
574 filediff.hunk_ops = hunkbit
577 filediff.hunk_ops = hunkbit
575 return filediff
578 return filediff
576
579
577 def parse_hunk(self, hunk, source_file, target_file):
580 def parse_hunk(self, hunk, source_file, target_file):
578 result = AttributeDict(dict(
581 result = AttributeDict(dict(
579 source_start=hunk['source_start'],
582 source_start=hunk['source_start'],
580 source_length=hunk['source_length'],
583 source_length=hunk['source_length'],
581 target_start=hunk['target_start'],
584 target_start=hunk['target_start'],
582 target_length=hunk['target_length'],
585 target_length=hunk['target_length'],
583 section_header=hunk['section_header'],
586 section_header=hunk['section_header'],
584 lines=[],
587 lines=[],
585 ))
588 ))
586 before, after = [], []
589 before, after = [], []
587
590
588 for line in hunk['lines']:
591 for line in hunk['lines']:
589 if line['action'] in ['unmod', 'unmod-no-hl']:
592 if line['action'] in ['unmod', 'unmod-no-hl']:
590 no_hl = line['action'] == 'unmod-no-hl'
593 no_hl = line['action'] == 'unmod-no-hl'
591 result.lines.extend(
594 result.lines.extend(
592 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
595 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
593 after.append(line)
596 after.append(line)
594 before.append(line)
597 before.append(line)
595 elif line['action'] == 'add':
598 elif line['action'] == 'add':
596 after.append(line)
599 after.append(line)
597 elif line['action'] == 'del':
600 elif line['action'] == 'del':
598 before.append(line)
601 before.append(line)
599 elif line['action'] == 'old-no-nl':
602 elif line['action'] == 'old-no-nl':
600 before.append(line)
603 before.append(line)
601 elif line['action'] == 'new-no-nl':
604 elif line['action'] == 'new-no-nl':
602 after.append(line)
605 after.append(line)
603
606
604 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
607 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
605 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
608 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
606 result.lines.extend(
609 result.lines.extend(
607 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
610 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
608 # NOTE(marcink): we must keep list() call here so we can cache the result...
611 # NOTE(marcink): we must keep list() call here so we can cache the result...
609 result.unified = list(self.as_unified(result.lines))
612 result.unified = list(self.as_unified(result.lines))
610 result.sideside = result.lines
613 result.sideside = result.lines
611
614
612 return result
615 return result
613
616
614 def parse_lines(self, before_lines, after_lines, source_file, target_file,
617 def parse_lines(self, before_lines, after_lines, source_file, target_file,
615 no_hl=False):
618 no_hl=False):
616 # TODO: dan: investigate doing the diff comparison and fast highlighting
619 # TODO: dan: investigate doing the diff comparison and fast highlighting
617 # on the entire before and after buffered block lines rather than by
620 # on the entire before and after buffered block lines rather than by
618 # line, this means we can get better 'fast' highlighting if the context
621 # line, this means we can get better 'fast' highlighting if the context
619 # allows it - eg.
622 # allows it - eg.
620 # line 4: """
623 # line 4: """
621 # line 5: this gets highlighted as a string
624 # line 5: this gets highlighted as a string
622 # line 6: """
625 # line 6: """
623
626
624 lines = []
627 lines = []
625
628
626 before_newline = AttributeDict()
629 before_newline = AttributeDict()
627 after_newline = AttributeDict()
630 after_newline = AttributeDict()
628 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
631 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
629 before_newline_line = before_lines.pop(-1)
632 before_newline_line = before_lines.pop(-1)
630 before_newline.content = '\n {}'.format(
633 before_newline.content = '\n {}'.format(
631 render_tokenstream(
634 render_tokenstream(
632 [(x[0], '', x[1])
635 [(x[0], '', x[1])
633 for x in [('nonl', before_newline_line['line'])]]))
636 for x in [('nonl', before_newline_line['line'])]]))
634
637
635 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
638 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
636 after_newline_line = after_lines.pop(-1)
639 after_newline_line = after_lines.pop(-1)
637 after_newline.content = '\n {}'.format(
640 after_newline.content = '\n {}'.format(
638 render_tokenstream(
641 render_tokenstream(
639 [(x[0], '', x[1])
642 [(x[0], '', x[1])
640 for x in [('nonl', after_newline_line['line'])]]))
643 for x in [('nonl', after_newline_line['line'])]]))
641
644
642 while before_lines or after_lines:
645 while before_lines or after_lines:
643 before, after = None, None
646 before, after = None, None
644 before_tokens, after_tokens = None, None
647 before_tokens, after_tokens = None, None
645
648
646 if before_lines:
649 if before_lines:
647 before = before_lines.pop(0)
650 before = before_lines.pop(0)
648 if after_lines:
651 if after_lines:
649 after = after_lines.pop(0)
652 after = after_lines.pop(0)
650
653
651 original = AttributeDict()
654 original = AttributeDict()
652 modified = AttributeDict()
655 modified = AttributeDict()
653
656
654 if before:
657 if before:
655 if before['action'] == 'old-no-nl':
658 if before['action'] == 'old-no-nl':
656 before_tokens = [('nonl', before['line'])]
659 before_tokens = [('nonl', before['line'])]
657 else:
660 else:
658 before_tokens = self.get_line_tokens(
661 before_tokens = self.get_line_tokens(
659 line_text=before['line'], line_number=before['old_lineno'],
662 line_text=before['line'], line_number=before['old_lineno'],
660 input_file=source_file, no_hl=no_hl)
663 input_file=source_file, no_hl=no_hl, source='before')
661 original.lineno = before['old_lineno']
664 original.lineno = before['old_lineno']
662 original.content = before['line']
665 original.content = before['line']
663 original.action = self.action_to_op(before['action'])
666 original.action = self.action_to_op(before['action'])
664
667
665 original.get_comment_args = (
668 original.get_comment_args = (
666 source_file, 'o', before['old_lineno'])
669 source_file, 'o', before['old_lineno'])
667
670
668 if after:
671 if after:
669 if after['action'] == 'new-no-nl':
672 if after['action'] == 'new-no-nl':
670 after_tokens = [('nonl', after['line'])]
673 after_tokens = [('nonl', after['line'])]
671 else:
674 else:
672 after_tokens = self.get_line_tokens(
675 after_tokens = self.get_line_tokens(
673 line_text=after['line'], line_number=after['new_lineno'],
676 line_text=after['line'], line_number=after['new_lineno'],
674 input_file=target_file, no_hl=no_hl)
677 input_file=target_file, no_hl=no_hl, source='after')
675 modified.lineno = after['new_lineno']
678 modified.lineno = after['new_lineno']
676 modified.content = after['line']
679 modified.content = after['line']
677 modified.action = self.action_to_op(after['action'])
680 modified.action = self.action_to_op(after['action'])
678
681
679 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
682 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
680
683
681 # diff the lines
684 # diff the lines
682 if before_tokens and after_tokens:
685 if before_tokens and after_tokens:
683 o_tokens, m_tokens, similarity = tokens_diff(
686 o_tokens, m_tokens, similarity = tokens_diff(
684 before_tokens, after_tokens)
687 before_tokens, after_tokens)
685 original.content = render_tokenstream(o_tokens)
688 original.content = render_tokenstream(o_tokens)
686 modified.content = render_tokenstream(m_tokens)
689 modified.content = render_tokenstream(m_tokens)
687 elif before_tokens:
690 elif before_tokens:
688 original.content = render_tokenstream(
691 original.content = render_tokenstream(
689 [(x[0], '', x[1]) for x in before_tokens])
692 [(x[0], '', x[1]) for x in before_tokens])
690 elif after_tokens:
693 elif after_tokens:
691 modified.content = render_tokenstream(
694 modified.content = render_tokenstream(
692 [(x[0], '', x[1]) for x in after_tokens])
695 [(x[0], '', x[1]) for x in after_tokens])
693
696
694 if not before_lines and before_newline:
697 if not before_lines and before_newline:
695 original.content += before_newline.content
698 original.content += before_newline.content
696 before_newline = None
699 before_newline = None
697 if not after_lines and after_newline:
700 if not after_lines and after_newline:
698 modified.content += after_newline.content
701 modified.content += after_newline.content
699 after_newline = None
702 after_newline = None
700
703
701 lines.append(AttributeDict({
704 lines.append(AttributeDict({
702 'original': original,
705 'original': original,
703 'modified': modified,
706 'modified': modified,
704 }))
707 }))
705
708
706 return lines
709 return lines
707
710
708 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
711 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False, source=''):
709 filenode = None
712 filenode = None
710 filename = None
713 filename = None
711
714
712 if isinstance(input_file, compat.string_types):
715 if isinstance(input_file, compat.string_types):
713 filename = input_file
716 filename = input_file
714 elif isinstance(input_file, FileNode):
717 elif isinstance(input_file, FileNode):
715 filenode = input_file
718 filenode = input_file
716 filename = input_file.unicode_path
719 filename = input_file.unicode_path
717
720
718 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
721 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
719 if hl_mode == self.HL_REAL and filenode:
722 if hl_mode == self.HL_REAL and filenode:
720 lexer = self._get_lexer_for_filename(filename)
723 lexer = self._get_lexer_for_filename(filename)
721 file_size_allowed = input_file.size < self.max_file_size_limit
724 file_size_allowed = input_file.size < self.max_file_size_limit
722 if line_number and file_size_allowed:
725 if line_number and file_size_allowed:
723 return self.get_tokenized_filenode_line(
726 return self.get_tokenized_filenode_line(input_file, line_number, lexer, source)
724 input_file, line_number, lexer)
725
727
726 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
728 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
727 lexer = self._get_lexer_for_filename(filename)
729 lexer = self._get_lexer_for_filename(filename)
728 return list(tokenize_string(line_text, lexer))
730 return list(tokenize_string(line_text, lexer))
729
731
730 return list(tokenize_string(line_text, plain_text_lexer))
732 return list(tokenize_string(line_text, plain_text_lexer))
731
733
732 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
734 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None, source=''):
733
735
734 if filenode not in self.highlighted_filenodes:
736 def tokenize(_filenode):
735 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
737 self.highlighted_filenodes[source][filenode] = filenode_as_lines_tokens(filenode, lexer)
736 self.highlighted_filenodes[filenode] = tokenized_lines
738
739 if filenode not in self.highlighted_filenodes[source]:
740 tokenize(filenode)
737
741
738 try:
742 try:
739 return self.highlighted_filenodes[filenode][line_number - 1]
743 return self.highlighted_filenodes[source][filenode][line_number - 1]
740 except Exception:
744 except Exception:
741 log.exception('diff rendering error')
745 log.exception('diff rendering error')
742 return [('', u'rhodecode diff rendering error')]
746 return [('', u'L{}: rhodecode diff rendering error'.format(line_number))]
743
747
744 def action_to_op(self, action):
748 def action_to_op(self, action):
745 return {
749 return {
746 'add': '+',
750 'add': '+',
747 'del': '-',
751 'del': '-',
748 'unmod': ' ',
752 'unmod': ' ',
749 'unmod-no-hl': ' ',
753 'unmod-no-hl': ' ',
750 'old-no-nl': ' ',
754 'old-no-nl': ' ',
751 'new-no-nl': ' ',
755 'new-no-nl': ' ',
752 }.get(action, action)
756 }.get(action, action)
753
757
754 def as_unified(self, lines):
758 def as_unified(self, lines):
755 """
759 """
756 Return a generator that yields the lines of a diff in unified order
760 Return a generator that yields the lines of a diff in unified order
757 """
761 """
758 def generator():
762 def generator():
759 buf = []
763 buf = []
760 for line in lines:
764 for line in lines:
761
765
762 if buf and not line.original or line.original.action == ' ':
766 if buf and not line.original or line.original.action == ' ':
763 for b in buf:
767 for b in buf:
764 yield b
768 yield b
765 buf = []
769 buf = []
766
770
767 if line.original:
771 if line.original:
768 if line.original.action == ' ':
772 if line.original.action == ' ':
769 yield (line.original.lineno, line.modified.lineno,
773 yield (line.original.lineno, line.modified.lineno,
770 line.original.action, line.original.content,
774 line.original.action, line.original.content,
771 line.original.get_comment_args)
775 line.original.get_comment_args)
772 continue
776 continue
773
777
774 if line.original.action == '-':
778 if line.original.action == '-':
775 yield (line.original.lineno, None,
779 yield (line.original.lineno, None,
776 line.original.action, line.original.content,
780 line.original.action, line.original.content,
777 line.original.get_comment_args)
781 line.original.get_comment_args)
778
782
779 if line.modified.action == '+':
783 if line.modified.action == '+':
780 buf.append((
784 buf.append((
781 None, line.modified.lineno,
785 None, line.modified.lineno,
782 line.modified.action, line.modified.content,
786 line.modified.action, line.modified.content,
783 line.modified.get_comment_args))
787 line.modified.get_comment_args))
784 continue
788 continue
785
789
786 if line.modified:
790 if line.modified:
787 yield (None, line.modified.lineno,
791 yield (None, line.modified.lineno,
788 line.modified.action, line.modified.content,
792 line.modified.action, line.modified.content,
789 line.modified.get_comment_args)
793 line.modified.get_comment_args)
790
794
791 for b in buf:
795 for b in buf:
792 yield b
796 yield b
793
797
794 return generator()
798 return generator()
General Comments 0
You need to be logged in to leave comments. Login now