##// END OF EJS Templates
diffs: handle very odd case of binary, corrupted diffs which crashed the diff parser.
dan -
r3831:0f09c1a7 stable
parent child Browse files
Show More
@@ -1,786 +1,792 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2019 RhodeCode GmbH
3 # Copyright (C) 2011-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 import difflib
23 from itertools import groupby
23 from itertools import groupby
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from pygments.lexers.special import TextLexer, Token
27 from pygments.lexers.special import TextLexer, Token
28 from pygments.lexers import get_lexer_by_name
28 from pygments.lexers import get_lexer_by_name
29 from pyramid import compat
29 from pyramid import compat
30
30
31 from rhodecode.lib.helpers import (
31 from rhodecode.lib.helpers import (
32 get_lexer_for_filenode, html_escape, get_custom_lexer)
32 get_lexer_for_filenode, html_escape, get_custom_lexer)
33 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
33 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
34 from rhodecode.lib.vcs.nodes import FileNode
34 from rhodecode.lib.vcs.nodes import FileNode
35 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
35 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
36 from rhodecode.lib.diff_match_patch import diff_match_patch
36 from rhodecode.lib.diff_match_patch import diff_match_patch
37 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
37 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
38
38
39
39
40 plain_text_lexer = get_lexer_by_name(
40 plain_text_lexer = get_lexer_by_name(
41 'text', stripall=False, stripnl=False, ensurenl=False)
41 'text', stripall=False, stripnl=False, ensurenl=False)
42
42
43
43
44 log = logging.getLogger(__name__)
44 log = logging.getLogger(__name__)
45
45
46
46
47 def filenode_as_lines_tokens(filenode, lexer=None):
47 def filenode_as_lines_tokens(filenode, lexer=None):
48 org_lexer = lexer
48 org_lexer = lexer
49 lexer = lexer or get_lexer_for_filenode(filenode)
49 lexer = lexer or get_lexer_for_filenode(filenode)
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
50 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
51 lexer, filenode, org_lexer)
51 lexer, filenode, org_lexer)
52 content = filenode.content
52 content = filenode.content
53 tokens = tokenize_string(content, lexer)
53 tokens = tokenize_string(content, lexer)
54 lines = split_token_stream(tokens, content)
54 lines = split_token_stream(tokens, content)
55 rv = list(lines)
55 rv = list(lines)
56 return rv
56 return rv
57
57
58
58
59 def tokenize_string(content, lexer):
59 def tokenize_string(content, lexer):
60 """
60 """
61 Use pygments to tokenize some content based on a lexer
61 Use pygments to tokenize some content based on a lexer
62 ensuring all original new lines and whitespace is preserved
62 ensuring all original new lines and whitespace is preserved
63 """
63 """
64
64
65 lexer.stripall = False
65 lexer.stripall = False
66 lexer.stripnl = False
66 lexer.stripnl = False
67 lexer.ensurenl = False
67 lexer.ensurenl = False
68
68
69 if isinstance(lexer, TextLexer):
69 if isinstance(lexer, TextLexer):
70 lexed = [(Token.Text, content)]
70 lexed = [(Token.Text, content)]
71 else:
71 else:
72 lexed = lex(content, lexer)
72 lexed = lex(content, lexer)
73
73
74 for token_type, token_text in lexed:
74 for token_type, token_text in lexed:
75 yield pygment_token_class(token_type), token_text
75 yield pygment_token_class(token_type), token_text
76
76
77
77
78 def split_token_stream(tokens, content):
78 def split_token_stream(tokens, content):
79 """
79 """
80 Take a list of (TokenType, text) tuples and split them by a string
80 Take a list of (TokenType, text) tuples and split them by a string
81
81
82 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
82 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
83 [(TEXT, 'some'), (TEXT, 'text'),
83 [(TEXT, 'some'), (TEXT, 'text'),
84 (TEXT, 'more'), (TEXT, 'text')]
84 (TEXT, 'more'), (TEXT, 'text')]
85 """
85 """
86
86
87 token_buffer = []
87 token_buffer = []
88 for token_class, token_text in tokens:
88 for token_class, token_text in tokens:
89 parts = token_text.split('\n')
89 parts = token_text.split('\n')
90 for part in parts[:-1]:
90 for part in parts[:-1]:
91 token_buffer.append((token_class, part))
91 token_buffer.append((token_class, part))
92 yield token_buffer
92 yield token_buffer
93 token_buffer = []
93 token_buffer = []
94
94
95 token_buffer.append((token_class, parts[-1]))
95 token_buffer.append((token_class, parts[-1]))
96
96
97 if token_buffer:
97 if token_buffer:
98 yield token_buffer
98 yield token_buffer
99 elif content:
99 elif content:
100 # this is a special case, we have the content, but tokenization didn't produce
100 # this is a special case, we have the content, but tokenization didn't produce
101 # any results. THis can happen if know file extensions like .css have some bogus
101 # any results. THis can happen if know file extensions like .css have some bogus
102 # unicode content without any newline characters
102 # unicode content without any newline characters
103 yield [(pygment_token_class(Token.Text), content)]
103 yield [(pygment_token_class(Token.Text), content)]
104
104
105
105
106 def filenode_as_annotated_lines_tokens(filenode):
106 def filenode_as_annotated_lines_tokens(filenode):
107 """
107 """
108 Take a file node and return a list of annotations => lines, if no annotation
108 Take a file node and return a list of annotations => lines, if no annotation
109 is found, it will be None.
109 is found, it will be None.
110
110
111 eg:
111 eg:
112
112
113 [
113 [
114 (annotation1, [
114 (annotation1, [
115 (1, line1_tokens_list),
115 (1, line1_tokens_list),
116 (2, line2_tokens_list),
116 (2, line2_tokens_list),
117 ]),
117 ]),
118 (annotation2, [
118 (annotation2, [
119 (3, line1_tokens_list),
119 (3, line1_tokens_list),
120 ]),
120 ]),
121 (None, [
121 (None, [
122 (4, line1_tokens_list),
122 (4, line1_tokens_list),
123 ]),
123 ]),
124 (annotation1, [
124 (annotation1, [
125 (5, line1_tokens_list),
125 (5, line1_tokens_list),
126 (6, line2_tokens_list),
126 (6, line2_tokens_list),
127 ])
127 ])
128 ]
128 ]
129 """
129 """
130
130
131 commit_cache = {} # cache commit_getter lookups
131 commit_cache = {} # cache commit_getter lookups
132
132
133 def _get_annotation(commit_id, commit_getter):
133 def _get_annotation(commit_id, commit_getter):
134 if commit_id not in commit_cache:
134 if commit_id not in commit_cache:
135 commit_cache[commit_id] = commit_getter()
135 commit_cache[commit_id] = commit_getter()
136 return commit_cache[commit_id]
136 return commit_cache[commit_id]
137
137
138 annotation_lookup = {
138 annotation_lookup = {
139 line_no: _get_annotation(commit_id, commit_getter)
139 line_no: _get_annotation(commit_id, commit_getter)
140 for line_no, commit_id, commit_getter, line_content
140 for line_no, commit_id, commit_getter, line_content
141 in filenode.annotate
141 in filenode.annotate
142 }
142 }
143
143
144 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
144 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
145 for line_no, tokens
145 for line_no, tokens
146 in enumerate(filenode_as_lines_tokens(filenode), 1))
146 in enumerate(filenode_as_lines_tokens(filenode), 1))
147
147
148 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
148 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
149
149
150 for annotation, group in grouped_annotations_lines:
150 for annotation, group in grouped_annotations_lines:
151 yield (
151 yield (
152 annotation, [(line_no, tokens)
152 annotation, [(line_no, tokens)
153 for (_, line_no, tokens) in group]
153 for (_, line_no, tokens) in group]
154 )
154 )
155
155
156
156
157 def render_tokenstream(tokenstream):
157 def render_tokenstream(tokenstream):
158 result = []
158 result = []
159 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
159 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
160
160
161 if token_class:
161 if token_class:
162 result.append(u'<span class="%s">' % token_class)
162 result.append(u'<span class="%s">' % token_class)
163 else:
163 else:
164 result.append(u'<span>')
164 result.append(u'<span>')
165
165
166 for op_tag, token_text in token_ops_texts:
166 for op_tag, token_text in token_ops_texts:
167
167
168 if op_tag:
168 if op_tag:
169 result.append(u'<%s>' % op_tag)
169 result.append(u'<%s>' % op_tag)
170
170
171 # NOTE(marcink): in some cases of mixed encodings, we might run into
172 # troubles in the html_escape, in this case we say unicode force on token_text
173 # that would ensure "correct" data even with the cost of rendered
174 try:
171 escaped_text = html_escape(token_text)
175 escaped_text = html_escape(token_text)
176 except TypeError:
177 escaped_text = html_escape(safe_unicode(token_text))
172
178
173 # TODO: dan: investigate showing hidden characters like space/nl/tab
179 # TODO: dan: investigate showing hidden characters like space/nl/tab
174 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
180 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
175 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
181 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
176 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
182 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
177
183
178 result.append(escaped_text)
184 result.append(escaped_text)
179
185
180 if op_tag:
186 if op_tag:
181 result.append(u'</%s>' % op_tag)
187 result.append(u'</%s>' % op_tag)
182
188
183 result.append(u'</span>')
189 result.append(u'</span>')
184
190
185 html = ''.join(result)
191 html = ''.join(result)
186 return html
192 return html
187
193
188
194
189 def rollup_tokenstream(tokenstream):
195 def rollup_tokenstream(tokenstream):
190 """
196 """
191 Group a token stream of the format:
197 Group a token stream of the format:
192
198
193 ('class', 'op', 'text')
199 ('class', 'op', 'text')
194 or
200 or
195 ('class', 'text')
201 ('class', 'text')
196
202
197 into
203 into
198
204
199 [('class1',
205 [('class1',
200 [('op1', 'text'),
206 [('op1', 'text'),
201 ('op2', 'text')]),
207 ('op2', 'text')]),
202 ('class2',
208 ('class2',
203 [('op3', 'text')])]
209 [('op3', 'text')])]
204
210
205 This is used to get the minimal tags necessary when
211 This is used to get the minimal tags necessary when
206 rendering to html eg for a token stream ie.
212 rendering to html eg for a token stream ie.
207
213
208 <span class="A"><ins>he</ins>llo</span>
214 <span class="A"><ins>he</ins>llo</span>
209 vs
215 vs
210 <span class="A"><ins>he</ins></span><span class="A">llo</span>
216 <span class="A"><ins>he</ins></span><span class="A">llo</span>
211
217
212 If a 2 tuple is passed in, the output op will be an empty string.
218 If a 2 tuple is passed in, the output op will be an empty string.
213
219
214 eg:
220 eg:
215
221
216 >>> rollup_tokenstream([('classA', '', 'h'),
222 >>> rollup_tokenstream([('classA', '', 'h'),
217 ('classA', 'del', 'ell'),
223 ('classA', 'del', 'ell'),
218 ('classA', '', 'o'),
224 ('classA', '', 'o'),
219 ('classB', '', ' '),
225 ('classB', '', ' '),
220 ('classA', '', 'the'),
226 ('classA', '', 'the'),
221 ('classA', '', 're'),
227 ('classA', '', 're'),
222 ])
228 ])
223
229
224 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
230 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
225 ('classB', [('', ' ')],
231 ('classB', [('', ' ')],
226 ('classA', [('', 'there')]]
232 ('classA', [('', 'there')]]
227
233
228 """
234 """
229 if tokenstream and len(tokenstream[0]) == 2:
235 if tokenstream and len(tokenstream[0]) == 2:
230 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
236 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
231
237
232 result = []
238 result = []
233 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
239 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
234 ops = []
240 ops = []
235 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
241 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
236 text_buffer = []
242 text_buffer = []
237 for t_class, t_op, t_text in token_text_list:
243 for t_class, t_op, t_text in token_text_list:
238 text_buffer.append(t_text)
244 text_buffer.append(t_text)
239 ops.append((token_op, ''.join(text_buffer)))
245 ops.append((token_op, ''.join(text_buffer)))
240 result.append((token_class, ops))
246 result.append((token_class, ops))
241 return result
247 return result
242
248
243
249
244 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
250 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
245 """
251 """
246 Converts a list of (token_class, token_text) tuples to a list of
252 Converts a list of (token_class, token_text) tuples to a list of
247 (token_class, token_op, token_text) tuples where token_op is one of
253 (token_class, token_op, token_text) tuples where token_op is one of
248 ('ins', 'del', '')
254 ('ins', 'del', '')
249
255
250 :param old_tokens: list of (token_class, token_text) tuples of old line
256 :param old_tokens: list of (token_class, token_text) tuples of old line
251 :param new_tokens: list of (token_class, token_text) tuples of new line
257 :param new_tokens: list of (token_class, token_text) tuples of new line
252 :param use_diff_match_patch: boolean, will use google's diff match patch
258 :param use_diff_match_patch: boolean, will use google's diff match patch
253 library which has options to 'smooth' out the character by character
259 library which has options to 'smooth' out the character by character
254 differences making nicer ins/del blocks
260 differences making nicer ins/del blocks
255 """
261 """
256
262
257 old_tokens_result = []
263 old_tokens_result = []
258 new_tokens_result = []
264 new_tokens_result = []
259
265
260 similarity = difflib.SequenceMatcher(None,
266 similarity = difflib.SequenceMatcher(None,
261 ''.join(token_text for token_class, token_text in old_tokens),
267 ''.join(token_text for token_class, token_text in old_tokens),
262 ''.join(token_text for token_class, token_text in new_tokens)
268 ''.join(token_text for token_class, token_text in new_tokens)
263 ).ratio()
269 ).ratio()
264
270
265 if similarity < 0.6: # return, the blocks are too different
271 if similarity < 0.6: # return, the blocks are too different
266 for token_class, token_text in old_tokens:
272 for token_class, token_text in old_tokens:
267 old_tokens_result.append((token_class, '', token_text))
273 old_tokens_result.append((token_class, '', token_text))
268 for token_class, token_text in new_tokens:
274 for token_class, token_text in new_tokens:
269 new_tokens_result.append((token_class, '', token_text))
275 new_tokens_result.append((token_class, '', token_text))
270 return old_tokens_result, new_tokens_result, similarity
276 return old_tokens_result, new_tokens_result, similarity
271
277
272 token_sequence_matcher = difflib.SequenceMatcher(None,
278 token_sequence_matcher = difflib.SequenceMatcher(None,
273 [x[1] for x in old_tokens],
279 [x[1] for x in old_tokens],
274 [x[1] for x in new_tokens])
280 [x[1] for x in new_tokens])
275
281
276 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
282 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
277 # check the differences by token block types first to give a more
283 # check the differences by token block types first to give a more
278 # nicer "block" level replacement vs character diffs
284 # nicer "block" level replacement vs character diffs
279
285
280 if tag == 'equal':
286 if tag == 'equal':
281 for token_class, token_text in old_tokens[o1:o2]:
287 for token_class, token_text in old_tokens[o1:o2]:
282 old_tokens_result.append((token_class, '', token_text))
288 old_tokens_result.append((token_class, '', token_text))
283 for token_class, token_text in new_tokens[n1:n2]:
289 for token_class, token_text in new_tokens[n1:n2]:
284 new_tokens_result.append((token_class, '', token_text))
290 new_tokens_result.append((token_class, '', token_text))
285 elif tag == 'delete':
291 elif tag == 'delete':
286 for token_class, token_text in old_tokens[o1:o2]:
292 for token_class, token_text in old_tokens[o1:o2]:
287 old_tokens_result.append((token_class, 'del', token_text))
293 old_tokens_result.append((token_class, 'del', token_text))
288 elif tag == 'insert':
294 elif tag == 'insert':
289 for token_class, token_text in new_tokens[n1:n2]:
295 for token_class, token_text in new_tokens[n1:n2]:
290 new_tokens_result.append((token_class, 'ins', token_text))
296 new_tokens_result.append((token_class, 'ins', token_text))
291 elif tag == 'replace':
297 elif tag == 'replace':
292 # if same type token blocks must be replaced, do a diff on the
298 # if same type token blocks must be replaced, do a diff on the
293 # characters in the token blocks to show individual changes
299 # characters in the token blocks to show individual changes
294
300
295 old_char_tokens = []
301 old_char_tokens = []
296 new_char_tokens = []
302 new_char_tokens = []
297 for token_class, token_text in old_tokens[o1:o2]:
303 for token_class, token_text in old_tokens[o1:o2]:
298 for char in token_text:
304 for char in token_text:
299 old_char_tokens.append((token_class, char))
305 old_char_tokens.append((token_class, char))
300
306
301 for token_class, token_text in new_tokens[n1:n2]:
307 for token_class, token_text in new_tokens[n1:n2]:
302 for char in token_text:
308 for char in token_text:
303 new_char_tokens.append((token_class, char))
309 new_char_tokens.append((token_class, char))
304
310
305 old_string = ''.join([token_text for
311 old_string = ''.join([token_text for
306 token_class, token_text in old_char_tokens])
312 token_class, token_text in old_char_tokens])
307 new_string = ''.join([token_text for
313 new_string = ''.join([token_text for
308 token_class, token_text in new_char_tokens])
314 token_class, token_text in new_char_tokens])
309
315
310 char_sequence = difflib.SequenceMatcher(
316 char_sequence = difflib.SequenceMatcher(
311 None, old_string, new_string)
317 None, old_string, new_string)
312 copcodes = char_sequence.get_opcodes()
318 copcodes = char_sequence.get_opcodes()
313 obuffer, nbuffer = [], []
319 obuffer, nbuffer = [], []
314
320
315 if use_diff_match_patch:
321 if use_diff_match_patch:
316 dmp = diff_match_patch()
322 dmp = diff_match_patch()
317 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
323 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
318 reps = dmp.diff_main(old_string, new_string)
324 reps = dmp.diff_main(old_string, new_string)
319 dmp.diff_cleanupEfficiency(reps)
325 dmp.diff_cleanupEfficiency(reps)
320
326
321 a, b = 0, 0
327 a, b = 0, 0
322 for op, rep in reps:
328 for op, rep in reps:
323 l = len(rep)
329 l = len(rep)
324 if op == 0:
330 if op == 0:
325 for i, c in enumerate(rep):
331 for i, c in enumerate(rep):
326 obuffer.append((old_char_tokens[a+i][0], '', c))
332 obuffer.append((old_char_tokens[a+i][0], '', c))
327 nbuffer.append((new_char_tokens[b+i][0], '', c))
333 nbuffer.append((new_char_tokens[b+i][0], '', c))
328 a += l
334 a += l
329 b += l
335 b += l
330 elif op == -1:
336 elif op == -1:
331 for i, c in enumerate(rep):
337 for i, c in enumerate(rep):
332 obuffer.append((old_char_tokens[a+i][0], 'del', c))
338 obuffer.append((old_char_tokens[a+i][0], 'del', c))
333 a += l
339 a += l
334 elif op == 1:
340 elif op == 1:
335 for i, c in enumerate(rep):
341 for i, c in enumerate(rep):
336 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
342 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
337 b += l
343 b += l
338 else:
344 else:
339 for ctag, co1, co2, cn1, cn2 in copcodes:
345 for ctag, co1, co2, cn1, cn2 in copcodes:
340 if ctag == 'equal':
346 if ctag == 'equal':
341 for token_class, token_text in old_char_tokens[co1:co2]:
347 for token_class, token_text in old_char_tokens[co1:co2]:
342 obuffer.append((token_class, '', token_text))
348 obuffer.append((token_class, '', token_text))
343 for token_class, token_text in new_char_tokens[cn1:cn2]:
349 for token_class, token_text in new_char_tokens[cn1:cn2]:
344 nbuffer.append((token_class, '', token_text))
350 nbuffer.append((token_class, '', token_text))
345 elif ctag == 'delete':
351 elif ctag == 'delete':
346 for token_class, token_text in old_char_tokens[co1:co2]:
352 for token_class, token_text in old_char_tokens[co1:co2]:
347 obuffer.append((token_class, 'del', token_text))
353 obuffer.append((token_class, 'del', token_text))
348 elif ctag == 'insert':
354 elif ctag == 'insert':
349 for token_class, token_text in new_char_tokens[cn1:cn2]:
355 for token_class, token_text in new_char_tokens[cn1:cn2]:
350 nbuffer.append((token_class, 'ins', token_text))
356 nbuffer.append((token_class, 'ins', token_text))
351 elif ctag == 'replace':
357 elif ctag == 'replace':
352 for token_class, token_text in old_char_tokens[co1:co2]:
358 for token_class, token_text in old_char_tokens[co1:co2]:
353 obuffer.append((token_class, 'del', token_text))
359 obuffer.append((token_class, 'del', token_text))
354 for token_class, token_text in new_char_tokens[cn1:cn2]:
360 for token_class, token_text in new_char_tokens[cn1:cn2]:
355 nbuffer.append((token_class, 'ins', token_text))
361 nbuffer.append((token_class, 'ins', token_text))
356
362
357 old_tokens_result.extend(obuffer)
363 old_tokens_result.extend(obuffer)
358 new_tokens_result.extend(nbuffer)
364 new_tokens_result.extend(nbuffer)
359
365
360 return old_tokens_result, new_tokens_result, similarity
366 return old_tokens_result, new_tokens_result, similarity
361
367
362
368
363 def diffset_node_getter(commit):
369 def diffset_node_getter(commit):
364 def get_node(fname):
370 def get_node(fname):
365 try:
371 try:
366 return commit.get_node(fname)
372 return commit.get_node(fname)
367 except NodeDoesNotExistError:
373 except NodeDoesNotExistError:
368 return None
374 return None
369
375
370 return get_node
376 return get_node
371
377
372
378
373 class DiffSet(object):
379 class DiffSet(object):
374 """
380 """
375 An object for parsing the diff result from diffs.DiffProcessor and
381 An object for parsing the diff result from diffs.DiffProcessor and
376 adding highlighting, side by side/unified renderings and line diffs
382 adding highlighting, side by side/unified renderings and line diffs
377 """
383 """
378
384
379 HL_REAL = 'REAL' # highlights using original file, slow
385 HL_REAL = 'REAL' # highlights using original file, slow
380 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
386 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
381 # in the case of multiline code
387 # in the case of multiline code
382 HL_NONE = 'NONE' # no highlighting, fastest
388 HL_NONE = 'NONE' # no highlighting, fastest
383
389
384 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
390 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
385 source_repo_name=None,
391 source_repo_name=None,
386 source_node_getter=lambda filename: None,
392 source_node_getter=lambda filename: None,
387 target_repo_name=None,
393 target_repo_name=None,
388 target_node_getter=lambda filename: None,
394 target_node_getter=lambda filename: None,
389 source_nodes=None, target_nodes=None,
395 source_nodes=None, target_nodes=None,
390 # files over this size will use fast highlighting
396 # files over this size will use fast highlighting
391 max_file_size_limit=150 * 1024,
397 max_file_size_limit=150 * 1024,
392 ):
398 ):
393
399
394 self.highlight_mode = highlight_mode
400 self.highlight_mode = highlight_mode
395 self.highlighted_filenodes = {}
401 self.highlighted_filenodes = {}
396 self.source_node_getter = source_node_getter
402 self.source_node_getter = source_node_getter
397 self.target_node_getter = target_node_getter
403 self.target_node_getter = target_node_getter
398 self.source_nodes = source_nodes or {}
404 self.source_nodes = source_nodes or {}
399 self.target_nodes = target_nodes or {}
405 self.target_nodes = target_nodes or {}
400 self.repo_name = repo_name
406 self.repo_name = repo_name
401 self.target_repo_name = target_repo_name or repo_name
407 self.target_repo_name = target_repo_name or repo_name
402 self.source_repo_name = source_repo_name or repo_name
408 self.source_repo_name = source_repo_name or repo_name
403 self.max_file_size_limit = max_file_size_limit
409 self.max_file_size_limit = max_file_size_limit
404
410
405 def render_patchset(self, patchset, source_ref=None, target_ref=None):
411 def render_patchset(self, patchset, source_ref=None, target_ref=None):
406 diffset = AttributeDict(dict(
412 diffset = AttributeDict(dict(
407 lines_added=0,
413 lines_added=0,
408 lines_deleted=0,
414 lines_deleted=0,
409 changed_files=0,
415 changed_files=0,
410 files=[],
416 files=[],
411 file_stats={},
417 file_stats={},
412 limited_diff=isinstance(patchset, LimitedDiffContainer),
418 limited_diff=isinstance(patchset, LimitedDiffContainer),
413 repo_name=self.repo_name,
419 repo_name=self.repo_name,
414 target_repo_name=self.target_repo_name,
420 target_repo_name=self.target_repo_name,
415 source_repo_name=self.source_repo_name,
421 source_repo_name=self.source_repo_name,
416 source_ref=source_ref,
422 source_ref=source_ref,
417 target_ref=target_ref,
423 target_ref=target_ref,
418 ))
424 ))
419 for patch in patchset:
425 for patch in patchset:
420 diffset.file_stats[patch['filename']] = patch['stats']
426 diffset.file_stats[patch['filename']] = patch['stats']
421 filediff = self.render_patch(patch)
427 filediff = self.render_patch(patch)
422 filediff.diffset = StrictAttributeDict(dict(
428 filediff.diffset = StrictAttributeDict(dict(
423 source_ref=diffset.source_ref,
429 source_ref=diffset.source_ref,
424 target_ref=diffset.target_ref,
430 target_ref=diffset.target_ref,
425 repo_name=diffset.repo_name,
431 repo_name=diffset.repo_name,
426 source_repo_name=diffset.source_repo_name,
432 source_repo_name=diffset.source_repo_name,
427 target_repo_name=diffset.target_repo_name,
433 target_repo_name=diffset.target_repo_name,
428 ))
434 ))
429 diffset.files.append(filediff)
435 diffset.files.append(filediff)
430 diffset.changed_files += 1
436 diffset.changed_files += 1
431 if not patch['stats']['binary']:
437 if not patch['stats']['binary']:
432 diffset.lines_added += patch['stats']['added']
438 diffset.lines_added += patch['stats']['added']
433 diffset.lines_deleted += patch['stats']['deleted']
439 diffset.lines_deleted += patch['stats']['deleted']
434
440
435 return diffset
441 return diffset
436
442
437 _lexer_cache = {}
443 _lexer_cache = {}
438
444
439 def _get_lexer_for_filename(self, filename, filenode=None):
445 def _get_lexer_for_filename(self, filename, filenode=None):
440 # cached because we might need to call it twice for source/target
446 # cached because we might need to call it twice for source/target
441 if filename not in self._lexer_cache:
447 if filename not in self._lexer_cache:
442 if filenode:
448 if filenode:
443 lexer = filenode.lexer
449 lexer = filenode.lexer
444 extension = filenode.extension
450 extension = filenode.extension
445 else:
451 else:
446 lexer = FileNode.get_lexer(filename=filename)
452 lexer = FileNode.get_lexer(filename=filename)
447 extension = filename.split('.')[-1]
453 extension = filename.split('.')[-1]
448
454
449 lexer = get_custom_lexer(extension) or lexer
455 lexer = get_custom_lexer(extension) or lexer
450 self._lexer_cache[filename] = lexer
456 self._lexer_cache[filename] = lexer
451 return self._lexer_cache[filename]
457 return self._lexer_cache[filename]
452
458
453 def render_patch(self, patch):
459 def render_patch(self, patch):
454 log.debug('rendering diff for %r', patch['filename'])
460 log.debug('rendering diff for %r', patch['filename'])
455
461
456 source_filename = patch['original_filename']
462 source_filename = patch['original_filename']
457 target_filename = patch['filename']
463 target_filename = patch['filename']
458
464
459 source_lexer = plain_text_lexer
465 source_lexer = plain_text_lexer
460 target_lexer = plain_text_lexer
466 target_lexer = plain_text_lexer
461
467
462 if not patch['stats']['binary']:
468 if not patch['stats']['binary']:
463 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
469 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
464 hl_mode = node_hl_mode or self.highlight_mode
470 hl_mode = node_hl_mode or self.highlight_mode
465
471
466 if hl_mode == self.HL_REAL:
472 if hl_mode == self.HL_REAL:
467 if (source_filename and patch['operation'] in ('D', 'M')
473 if (source_filename and patch['operation'] in ('D', 'M')
468 and source_filename not in self.source_nodes):
474 and source_filename not in self.source_nodes):
469 self.source_nodes[source_filename] = (
475 self.source_nodes[source_filename] = (
470 self.source_node_getter(source_filename))
476 self.source_node_getter(source_filename))
471
477
472 if (target_filename and patch['operation'] in ('A', 'M')
478 if (target_filename and patch['operation'] in ('A', 'M')
473 and target_filename not in self.target_nodes):
479 and target_filename not in self.target_nodes):
474 self.target_nodes[target_filename] = (
480 self.target_nodes[target_filename] = (
475 self.target_node_getter(target_filename))
481 self.target_node_getter(target_filename))
476
482
477 elif hl_mode == self.HL_FAST:
483 elif hl_mode == self.HL_FAST:
478 source_lexer = self._get_lexer_for_filename(source_filename)
484 source_lexer = self._get_lexer_for_filename(source_filename)
479 target_lexer = self._get_lexer_for_filename(target_filename)
485 target_lexer = self._get_lexer_for_filename(target_filename)
480
486
481 source_file = self.source_nodes.get(source_filename, source_filename)
487 source_file = self.source_nodes.get(source_filename, source_filename)
482 target_file = self.target_nodes.get(target_filename, target_filename)
488 target_file = self.target_nodes.get(target_filename, target_filename)
483 raw_id_uid = ''
489 raw_id_uid = ''
484 if self.source_nodes.get(source_filename):
490 if self.source_nodes.get(source_filename):
485 raw_id_uid = self.source_nodes[source_filename].commit.raw_id
491 raw_id_uid = self.source_nodes[source_filename].commit.raw_id
486
492
487 if not raw_id_uid and self.target_nodes.get(target_filename):
493 if not raw_id_uid and self.target_nodes.get(target_filename):
488 # in case this is a new file we only have it in target
494 # in case this is a new file we only have it in target
489 raw_id_uid = self.target_nodes[target_filename].commit.raw_id
495 raw_id_uid = self.target_nodes[target_filename].commit.raw_id
490
496
491 source_filenode, target_filenode = None, None
497 source_filenode, target_filenode = None, None
492
498
493 # TODO: dan: FileNode.lexer works on the content of the file - which
499 # TODO: dan: FileNode.lexer works on the content of the file - which
494 # can be slow - issue #4289 explains a lexer clean up - which once
500 # can be slow - issue #4289 explains a lexer clean up - which once
495 # done can allow caching a lexer for a filenode to avoid the file lookup
501 # done can allow caching a lexer for a filenode to avoid the file lookup
496 if isinstance(source_file, FileNode):
502 if isinstance(source_file, FileNode):
497 source_filenode = source_file
503 source_filenode = source_file
498 #source_lexer = source_file.lexer
504 #source_lexer = source_file.lexer
499 source_lexer = self._get_lexer_for_filename(source_filename)
505 source_lexer = self._get_lexer_for_filename(source_filename)
500 source_file.lexer = source_lexer
506 source_file.lexer = source_lexer
501
507
502 if isinstance(target_file, FileNode):
508 if isinstance(target_file, FileNode):
503 target_filenode = target_file
509 target_filenode = target_file
504 #target_lexer = target_file.lexer
510 #target_lexer = target_file.lexer
505 target_lexer = self._get_lexer_for_filename(target_filename)
511 target_lexer = self._get_lexer_for_filename(target_filename)
506 target_file.lexer = target_lexer
512 target_file.lexer = target_lexer
507
513
508 source_file_path, target_file_path = None, None
514 source_file_path, target_file_path = None, None
509
515
510 if source_filename != '/dev/null':
516 if source_filename != '/dev/null':
511 source_file_path = source_filename
517 source_file_path = source_filename
512 if target_filename != '/dev/null':
518 if target_filename != '/dev/null':
513 target_file_path = target_filename
519 target_file_path = target_filename
514
520
515 source_file_type = source_lexer.name
521 source_file_type = source_lexer.name
516 target_file_type = target_lexer.name
522 target_file_type = target_lexer.name
517
523
518 filediff = AttributeDict({
524 filediff = AttributeDict({
519 'source_file_path': source_file_path,
525 'source_file_path': source_file_path,
520 'target_file_path': target_file_path,
526 'target_file_path': target_file_path,
521 'source_filenode': source_filenode,
527 'source_filenode': source_filenode,
522 'target_filenode': target_filenode,
528 'target_filenode': target_filenode,
523 'source_file_type': target_file_type,
529 'source_file_type': target_file_type,
524 'target_file_type': source_file_type,
530 'target_file_type': source_file_type,
525 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
531 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
526 'operation': patch['operation'],
532 'operation': patch['operation'],
527 'source_mode': patch['stats']['old_mode'],
533 'source_mode': patch['stats']['old_mode'],
528 'target_mode': patch['stats']['new_mode'],
534 'target_mode': patch['stats']['new_mode'],
529 'limited_diff': patch['is_limited_diff'],
535 'limited_diff': patch['is_limited_diff'],
530 'hunks': [],
536 'hunks': [],
531 'hunk_ops': None,
537 'hunk_ops': None,
532 'diffset': self,
538 'diffset': self,
533 'raw_id': raw_id_uid,
539 'raw_id': raw_id_uid,
534 })
540 })
535
541
536 file_chunks = patch['chunks'][1:]
542 file_chunks = patch['chunks'][1:]
537 for hunk in file_chunks:
543 for hunk in file_chunks:
538 hunkbit = self.parse_hunk(hunk, source_file, target_file)
544 hunkbit = self.parse_hunk(hunk, source_file, target_file)
539 hunkbit.source_file_path = source_file_path
545 hunkbit.source_file_path = source_file_path
540 hunkbit.target_file_path = target_file_path
546 hunkbit.target_file_path = target_file_path
541 filediff.hunks.append(hunkbit)
547 filediff.hunks.append(hunkbit)
542
548
543 # Simulate hunk on OPS type line which doesn't really contain any diff
549 # Simulate hunk on OPS type line which doesn't really contain any diff
544 # this allows commenting on those
550 # this allows commenting on those
545 if not file_chunks:
551 if not file_chunks:
546 actions = []
552 actions = []
547 for op_id, op_text in filediff.patch['stats']['ops'].items():
553 for op_id, op_text in filediff.patch['stats']['ops'].items():
548 if op_id == DEL_FILENODE:
554 if op_id == DEL_FILENODE:
549 actions.append(u'file was removed')
555 actions.append(u'file was removed')
550 elif op_id == BIN_FILENODE:
556 elif op_id == BIN_FILENODE:
551 actions.append(u'binary diff hidden')
557 actions.append(u'binary diff hidden')
552 else:
558 else:
553 actions.append(safe_unicode(op_text))
559 actions.append(safe_unicode(op_text))
554 action_line = u'NO CONTENT: ' + \
560 action_line = u'NO CONTENT: ' + \
555 u', '.join(actions) or u'UNDEFINED_ACTION'
561 u', '.join(actions) or u'UNDEFINED_ACTION'
556
562
557 hunk_ops = {'source_length': 0, 'source_start': 0,
563 hunk_ops = {'source_length': 0, 'source_start': 0,
558 'lines': [
564 'lines': [
559 {'new_lineno': 0, 'old_lineno': 1,
565 {'new_lineno': 0, 'old_lineno': 1,
560 'action': 'unmod-no-hl', 'line': action_line}
566 'action': 'unmod-no-hl', 'line': action_line}
561 ],
567 ],
562 'section_header': u'', 'target_start': 1, 'target_length': 1}
568 'section_header': u'', 'target_start': 1, 'target_length': 1}
563
569
564 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
570 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
565 hunkbit.source_file_path = source_file_path
571 hunkbit.source_file_path = source_file_path
566 hunkbit.target_file_path = target_file_path
572 hunkbit.target_file_path = target_file_path
567 filediff.hunk_ops = hunkbit
573 filediff.hunk_ops = hunkbit
568 return filediff
574 return filediff
569
575
570 def parse_hunk(self, hunk, source_file, target_file):
576 def parse_hunk(self, hunk, source_file, target_file):
571 result = AttributeDict(dict(
577 result = AttributeDict(dict(
572 source_start=hunk['source_start'],
578 source_start=hunk['source_start'],
573 source_length=hunk['source_length'],
579 source_length=hunk['source_length'],
574 target_start=hunk['target_start'],
580 target_start=hunk['target_start'],
575 target_length=hunk['target_length'],
581 target_length=hunk['target_length'],
576 section_header=hunk['section_header'],
582 section_header=hunk['section_header'],
577 lines=[],
583 lines=[],
578 ))
584 ))
579 before, after = [], []
585 before, after = [], []
580
586
581 for line in hunk['lines']:
587 for line in hunk['lines']:
582 if line['action'] in ['unmod', 'unmod-no-hl']:
588 if line['action'] in ['unmod', 'unmod-no-hl']:
583 no_hl = line['action'] == 'unmod-no-hl'
589 no_hl = line['action'] == 'unmod-no-hl'
584 result.lines.extend(
590 result.lines.extend(
585 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
591 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
586 after.append(line)
592 after.append(line)
587 before.append(line)
593 before.append(line)
588 elif line['action'] == 'add':
594 elif line['action'] == 'add':
589 after.append(line)
595 after.append(line)
590 elif line['action'] == 'del':
596 elif line['action'] == 'del':
591 before.append(line)
597 before.append(line)
592 elif line['action'] == 'old-no-nl':
598 elif line['action'] == 'old-no-nl':
593 before.append(line)
599 before.append(line)
594 elif line['action'] == 'new-no-nl':
600 elif line['action'] == 'new-no-nl':
595 after.append(line)
601 after.append(line)
596
602
597 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
603 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
598 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
604 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
599 result.lines.extend(
605 result.lines.extend(
600 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
606 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
601 # NOTE(marcink): we must keep list() call here so we can cache the result...
607 # NOTE(marcink): we must keep list() call here so we can cache the result...
602 result.unified = list(self.as_unified(result.lines))
608 result.unified = list(self.as_unified(result.lines))
603 result.sideside = result.lines
609 result.sideside = result.lines
604
610
605 return result
611 return result
606
612
607 def parse_lines(self, before_lines, after_lines, source_file, target_file,
613 def parse_lines(self, before_lines, after_lines, source_file, target_file,
608 no_hl=False):
614 no_hl=False):
609 # TODO: dan: investigate doing the diff comparison and fast highlighting
615 # TODO: dan: investigate doing the diff comparison and fast highlighting
610 # on the entire before and after buffered block lines rather than by
616 # on the entire before and after buffered block lines rather than by
611 # line, this means we can get better 'fast' highlighting if the context
617 # line, this means we can get better 'fast' highlighting if the context
612 # allows it - eg.
618 # allows it - eg.
613 # line 4: """
619 # line 4: """
614 # line 5: this gets highlighted as a string
620 # line 5: this gets highlighted as a string
615 # line 6: """
621 # line 6: """
616
622
617 lines = []
623 lines = []
618
624
619 before_newline = AttributeDict()
625 before_newline = AttributeDict()
620 after_newline = AttributeDict()
626 after_newline = AttributeDict()
621 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
627 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
622 before_newline_line = before_lines.pop(-1)
628 before_newline_line = before_lines.pop(-1)
623 before_newline.content = '\n {}'.format(
629 before_newline.content = '\n {}'.format(
624 render_tokenstream(
630 render_tokenstream(
625 [(x[0], '', x[1])
631 [(x[0], '', x[1])
626 for x in [('nonl', before_newline_line['line'])]]))
632 for x in [('nonl', before_newline_line['line'])]]))
627
633
628 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
634 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
629 after_newline_line = after_lines.pop(-1)
635 after_newline_line = after_lines.pop(-1)
630 after_newline.content = '\n {}'.format(
636 after_newline.content = '\n {}'.format(
631 render_tokenstream(
637 render_tokenstream(
632 [(x[0], '', x[1])
638 [(x[0], '', x[1])
633 for x in [('nonl', after_newline_line['line'])]]))
639 for x in [('nonl', after_newline_line['line'])]]))
634
640
635 while before_lines or after_lines:
641 while before_lines or after_lines:
636 before, after = None, None
642 before, after = None, None
637 before_tokens, after_tokens = None, None
643 before_tokens, after_tokens = None, None
638
644
639 if before_lines:
645 if before_lines:
640 before = before_lines.pop(0)
646 before = before_lines.pop(0)
641 if after_lines:
647 if after_lines:
642 after = after_lines.pop(0)
648 after = after_lines.pop(0)
643
649
644 original = AttributeDict()
650 original = AttributeDict()
645 modified = AttributeDict()
651 modified = AttributeDict()
646
652
647 if before:
653 if before:
648 if before['action'] == 'old-no-nl':
654 if before['action'] == 'old-no-nl':
649 before_tokens = [('nonl', before['line'])]
655 before_tokens = [('nonl', before['line'])]
650 else:
656 else:
651 before_tokens = self.get_line_tokens(
657 before_tokens = self.get_line_tokens(
652 line_text=before['line'], line_number=before['old_lineno'],
658 line_text=before['line'], line_number=before['old_lineno'],
653 input_file=source_file, no_hl=no_hl)
659 input_file=source_file, no_hl=no_hl)
654 original.lineno = before['old_lineno']
660 original.lineno = before['old_lineno']
655 original.content = before['line']
661 original.content = before['line']
656 original.action = self.action_to_op(before['action'])
662 original.action = self.action_to_op(before['action'])
657
663
658 original.get_comment_args = (
664 original.get_comment_args = (
659 source_file, 'o', before['old_lineno'])
665 source_file, 'o', before['old_lineno'])
660
666
661 if after:
667 if after:
662 if after['action'] == 'new-no-nl':
668 if after['action'] == 'new-no-nl':
663 after_tokens = [('nonl', after['line'])]
669 after_tokens = [('nonl', after['line'])]
664 else:
670 else:
665 after_tokens = self.get_line_tokens(
671 after_tokens = self.get_line_tokens(
666 line_text=after['line'], line_number=after['new_lineno'],
672 line_text=after['line'], line_number=after['new_lineno'],
667 input_file=target_file, no_hl=no_hl)
673 input_file=target_file, no_hl=no_hl)
668 modified.lineno = after['new_lineno']
674 modified.lineno = after['new_lineno']
669 modified.content = after['line']
675 modified.content = after['line']
670 modified.action = self.action_to_op(after['action'])
676 modified.action = self.action_to_op(after['action'])
671
677
672 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
678 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
673
679
674 # diff the lines
680 # diff the lines
675 if before_tokens and after_tokens:
681 if before_tokens and after_tokens:
676 o_tokens, m_tokens, similarity = tokens_diff(
682 o_tokens, m_tokens, similarity = tokens_diff(
677 before_tokens, after_tokens)
683 before_tokens, after_tokens)
678 original.content = render_tokenstream(o_tokens)
684 original.content = render_tokenstream(o_tokens)
679 modified.content = render_tokenstream(m_tokens)
685 modified.content = render_tokenstream(m_tokens)
680 elif before_tokens:
686 elif before_tokens:
681 original.content = render_tokenstream(
687 original.content = render_tokenstream(
682 [(x[0], '', x[1]) for x in before_tokens])
688 [(x[0], '', x[1]) for x in before_tokens])
683 elif after_tokens:
689 elif after_tokens:
684 modified.content = render_tokenstream(
690 modified.content = render_tokenstream(
685 [(x[0], '', x[1]) for x in after_tokens])
691 [(x[0], '', x[1]) for x in after_tokens])
686
692
687 if not before_lines and before_newline:
693 if not before_lines and before_newline:
688 original.content += before_newline.content
694 original.content += before_newline.content
689 before_newline = None
695 before_newline = None
690 if not after_lines and after_newline:
696 if not after_lines and after_newline:
691 modified.content += after_newline.content
697 modified.content += after_newline.content
692 after_newline = None
698 after_newline = None
693
699
694 lines.append(AttributeDict({
700 lines.append(AttributeDict({
695 'original': original,
701 'original': original,
696 'modified': modified,
702 'modified': modified,
697 }))
703 }))
698
704
699 return lines
705 return lines
700
706
701 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
707 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
702 filenode = None
708 filenode = None
703 filename = None
709 filename = None
704
710
705 if isinstance(input_file, compat.string_types):
711 if isinstance(input_file, compat.string_types):
706 filename = input_file
712 filename = input_file
707 elif isinstance(input_file, FileNode):
713 elif isinstance(input_file, FileNode):
708 filenode = input_file
714 filenode = input_file
709 filename = input_file.unicode_path
715 filename = input_file.unicode_path
710
716
711 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
717 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
712 if hl_mode == self.HL_REAL and filenode:
718 if hl_mode == self.HL_REAL and filenode:
713 lexer = self._get_lexer_for_filename(filename)
719 lexer = self._get_lexer_for_filename(filename)
714 file_size_allowed = input_file.size < self.max_file_size_limit
720 file_size_allowed = input_file.size < self.max_file_size_limit
715 if line_number and file_size_allowed:
721 if line_number and file_size_allowed:
716 return self.get_tokenized_filenode_line(
722 return self.get_tokenized_filenode_line(
717 input_file, line_number, lexer)
723 input_file, line_number, lexer)
718
724
719 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
725 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
720 lexer = self._get_lexer_for_filename(filename)
726 lexer = self._get_lexer_for_filename(filename)
721 return list(tokenize_string(line_text, lexer))
727 return list(tokenize_string(line_text, lexer))
722
728
723 return list(tokenize_string(line_text, plain_text_lexer))
729 return list(tokenize_string(line_text, plain_text_lexer))
724
730
725 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
731 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
726
732
727 if filenode not in self.highlighted_filenodes:
733 if filenode not in self.highlighted_filenodes:
728 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
734 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
729 self.highlighted_filenodes[filenode] = tokenized_lines
735 self.highlighted_filenodes[filenode] = tokenized_lines
730
736
731 try:
737 try:
732 return self.highlighted_filenodes[filenode][line_number - 1]
738 return self.highlighted_filenodes[filenode][line_number - 1]
733 except Exception:
739 except Exception:
734 return [('', u'rhodecode diff rendering error')]
740 return [('', u'rhodecode diff rendering error')]
735
741
736 def action_to_op(self, action):
742 def action_to_op(self, action):
737 return {
743 return {
738 'add': '+',
744 'add': '+',
739 'del': '-',
745 'del': '-',
740 'unmod': ' ',
746 'unmod': ' ',
741 'unmod-no-hl': ' ',
747 'unmod-no-hl': ' ',
742 'old-no-nl': ' ',
748 'old-no-nl': ' ',
743 'new-no-nl': ' ',
749 'new-no-nl': ' ',
744 }.get(action, action)
750 }.get(action, action)
745
751
746 def as_unified(self, lines):
752 def as_unified(self, lines):
747 """
753 """
748 Return a generator that yields the lines of a diff in unified order
754 Return a generator that yields the lines of a diff in unified order
749 """
755 """
750 def generator():
756 def generator():
751 buf = []
757 buf = []
752 for line in lines:
758 for line in lines:
753
759
754 if buf and not line.original or line.original.action == ' ':
760 if buf and not line.original or line.original.action == ' ':
755 for b in buf:
761 for b in buf:
756 yield b
762 yield b
757 buf = []
763 buf = []
758
764
759 if line.original:
765 if line.original:
760 if line.original.action == ' ':
766 if line.original.action == ' ':
761 yield (line.original.lineno, line.modified.lineno,
767 yield (line.original.lineno, line.modified.lineno,
762 line.original.action, line.original.content,
768 line.original.action, line.original.content,
763 line.original.get_comment_args)
769 line.original.get_comment_args)
764 continue
770 continue
765
771
766 if line.original.action == '-':
772 if line.original.action == '-':
767 yield (line.original.lineno, None,
773 yield (line.original.lineno, None,
768 line.original.action, line.original.content,
774 line.original.action, line.original.content,
769 line.original.get_comment_args)
775 line.original.get_comment_args)
770
776
771 if line.modified.action == '+':
777 if line.modified.action == '+':
772 buf.append((
778 buf.append((
773 None, line.modified.lineno,
779 None, line.modified.lineno,
774 line.modified.action, line.modified.content,
780 line.modified.action, line.modified.content,
775 line.modified.get_comment_args))
781 line.modified.get_comment_args))
776 continue
782 continue
777
783
778 if line.modified:
784 if line.modified:
779 yield (None, line.modified.lineno,
785 yield (None, line.modified.lineno,
780 line.modified.action, line.modified.content,
786 line.modified.action, line.modified.content,
781 line.modified.get_comment_args)
787 line.modified.get_comment_args)
782
788
783 for b in buf:
789 for b in buf:
784 yield b
790 yield b
785
791
786 return generator()
792 return generator()
General Comments 0
You need to be logged in to leave comments. Login now