##// END OF EJS Templates
diffs: we should use removed instead of deleted.
marcink -
r3102:2cd36dd3 default
parent child Browse files
Show More
@@ -1,762 +1,762 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2018 RhodeCode GmbH
3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 import difflib
23 from itertools import groupby
23 from itertools import groupby
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from pygments.lexers.special import TextLexer, Token
27 from pygments.lexers.special import TextLexer, Token
28 from pygments.lexers import get_lexer_by_name
28 from pygments.lexers import get_lexer_by_name
29
29
30 from rhodecode.lib.helpers import (
30 from rhodecode.lib.helpers import (
31 get_lexer_for_filenode, html_escape, get_custom_lexer)
31 get_lexer_for_filenode, html_escape, get_custom_lexer)
32 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
32 from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
33 from rhodecode.lib.vcs.nodes import FileNode
33 from rhodecode.lib.vcs.nodes import FileNode
34 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
34 from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
35 from rhodecode.lib.diff_match_patch import diff_match_patch
35 from rhodecode.lib.diff_match_patch import diff_match_patch
36 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
36 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
37
37
38
38
39 plain_text_lexer = get_lexer_by_name(
39 plain_text_lexer = get_lexer_by_name(
40 'text', stripall=False, stripnl=False, ensurenl=False)
40 'text', stripall=False, stripnl=False, ensurenl=False)
41
41
42
42
43 log = logging.getLogger(__name__)
43 log = logging.getLogger(__name__)
44
44
45
45
46 def filenode_as_lines_tokens(filenode, lexer=None):
46 def filenode_as_lines_tokens(filenode, lexer=None):
47 org_lexer = lexer
47 org_lexer = lexer
48 lexer = lexer or get_lexer_for_filenode(filenode)
48 lexer = lexer or get_lexer_for_filenode(filenode)
49 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
49 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
50 lexer, filenode, org_lexer)
50 lexer, filenode, org_lexer)
51 tokens = tokenize_string(filenode.content, lexer)
51 tokens = tokenize_string(filenode.content, lexer)
52 lines = split_token_stream(tokens)
52 lines = split_token_stream(tokens)
53 rv = list(lines)
53 rv = list(lines)
54 return rv
54 return rv
55
55
56
56
57 def tokenize_string(content, lexer):
57 def tokenize_string(content, lexer):
58 """
58 """
59 Use pygments to tokenize some content based on a lexer
59 Use pygments to tokenize some content based on a lexer
60 ensuring all original new lines and whitespace is preserved
60 ensuring all original new lines and whitespace is preserved
61 """
61 """
62
62
63 lexer.stripall = False
63 lexer.stripall = False
64 lexer.stripnl = False
64 lexer.stripnl = False
65 lexer.ensurenl = False
65 lexer.ensurenl = False
66
66
67 if isinstance(lexer, TextLexer):
67 if isinstance(lexer, TextLexer):
68 lexed = [(Token.Text, content)]
68 lexed = [(Token.Text, content)]
69 else:
69 else:
70 lexed = lex(content, lexer)
70 lexed = lex(content, lexer)
71
71
72 for token_type, token_text in lexed:
72 for token_type, token_text in lexed:
73 yield pygment_token_class(token_type), token_text
73 yield pygment_token_class(token_type), token_text
74
74
75
75
76 def split_token_stream(tokens):
76 def split_token_stream(tokens):
77 """
77 """
78 Take a list of (TokenType, text) tuples and split them by a string
78 Take a list of (TokenType, text) tuples and split them by a string
79
79
80 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
80 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
81 [(TEXT, 'some'), (TEXT, 'text'),
81 [(TEXT, 'some'), (TEXT, 'text'),
82 (TEXT, 'more'), (TEXT, 'text')]
82 (TEXT, 'more'), (TEXT, 'text')]
83 """
83 """
84
84
85 buffer = []
85 buffer = []
86 for token_class, token_text in tokens:
86 for token_class, token_text in tokens:
87 parts = token_text.split('\n')
87 parts = token_text.split('\n')
88 for part in parts[:-1]:
88 for part in parts[:-1]:
89 buffer.append((token_class, part))
89 buffer.append((token_class, part))
90 yield buffer
90 yield buffer
91 buffer = []
91 buffer = []
92
92
93 buffer.append((token_class, parts[-1]))
93 buffer.append((token_class, parts[-1]))
94
94
95 if buffer:
95 if buffer:
96 yield buffer
96 yield buffer
97
97
98
98
99 def filenode_as_annotated_lines_tokens(filenode):
99 def filenode_as_annotated_lines_tokens(filenode):
100 """
100 """
101 Take a file node and return a list of annotations => lines, if no annotation
101 Take a file node and return a list of annotations => lines, if no annotation
102 is found, it will be None.
102 is found, it will be None.
103
103
104 eg:
104 eg:
105
105
106 [
106 [
107 (annotation1, [
107 (annotation1, [
108 (1, line1_tokens_list),
108 (1, line1_tokens_list),
109 (2, line2_tokens_list),
109 (2, line2_tokens_list),
110 ]),
110 ]),
111 (annotation2, [
111 (annotation2, [
112 (3, line1_tokens_list),
112 (3, line1_tokens_list),
113 ]),
113 ]),
114 (None, [
114 (None, [
115 (4, line1_tokens_list),
115 (4, line1_tokens_list),
116 ]),
116 ]),
117 (annotation1, [
117 (annotation1, [
118 (5, line1_tokens_list),
118 (5, line1_tokens_list),
119 (6, line2_tokens_list),
119 (6, line2_tokens_list),
120 ])
120 ])
121 ]
121 ]
122 """
122 """
123
123
124 commit_cache = {} # cache commit_getter lookups
124 commit_cache = {} # cache commit_getter lookups
125
125
126 def _get_annotation(commit_id, commit_getter):
126 def _get_annotation(commit_id, commit_getter):
127 if commit_id not in commit_cache:
127 if commit_id not in commit_cache:
128 commit_cache[commit_id] = commit_getter()
128 commit_cache[commit_id] = commit_getter()
129 return commit_cache[commit_id]
129 return commit_cache[commit_id]
130
130
131 annotation_lookup = {
131 annotation_lookup = {
132 line_no: _get_annotation(commit_id, commit_getter)
132 line_no: _get_annotation(commit_id, commit_getter)
133 for line_no, commit_id, commit_getter, line_content
133 for line_no, commit_id, commit_getter, line_content
134 in filenode.annotate
134 in filenode.annotate
135 }
135 }
136
136
137 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
137 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
138 for line_no, tokens
138 for line_no, tokens
139 in enumerate(filenode_as_lines_tokens(filenode), 1))
139 in enumerate(filenode_as_lines_tokens(filenode), 1))
140
140
141 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
141 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
142
142
143 for annotation, group in grouped_annotations_lines:
143 for annotation, group in grouped_annotations_lines:
144 yield (
144 yield (
145 annotation, [(line_no, tokens)
145 annotation, [(line_no, tokens)
146 for (_, line_no, tokens) in group]
146 for (_, line_no, tokens) in group]
147 )
147 )
148
148
149
149
150 def render_tokenstream(tokenstream):
150 def render_tokenstream(tokenstream):
151 result = []
151 result = []
152 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
152 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
153
153
154 if token_class:
154 if token_class:
155 result.append(u'<span class="%s">' % token_class)
155 result.append(u'<span class="%s">' % token_class)
156 else:
156 else:
157 result.append(u'<span>')
157 result.append(u'<span>')
158
158
159 for op_tag, token_text in token_ops_texts:
159 for op_tag, token_text in token_ops_texts:
160
160
161 if op_tag:
161 if op_tag:
162 result.append(u'<%s>' % op_tag)
162 result.append(u'<%s>' % op_tag)
163
163
164 escaped_text = html_escape(token_text)
164 escaped_text = html_escape(token_text)
165
165
166 # TODO: dan: investigate showing hidden characters like space/nl/tab
166 # TODO: dan: investigate showing hidden characters like space/nl/tab
167 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
167 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
168 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
168 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
169 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
169 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
170
170
171 result.append(escaped_text)
171 result.append(escaped_text)
172
172
173 if op_tag:
173 if op_tag:
174 result.append(u'</%s>' % op_tag)
174 result.append(u'</%s>' % op_tag)
175
175
176 result.append(u'</span>')
176 result.append(u'</span>')
177
177
178 html = ''.join(result)
178 html = ''.join(result)
179 return html
179 return html
180
180
181
181
182 def rollup_tokenstream(tokenstream):
182 def rollup_tokenstream(tokenstream):
183 """
183 """
184 Group a token stream of the format:
184 Group a token stream of the format:
185
185
186 ('class', 'op', 'text')
186 ('class', 'op', 'text')
187 or
187 or
188 ('class', 'text')
188 ('class', 'text')
189
189
190 into
190 into
191
191
192 [('class1',
192 [('class1',
193 [('op1', 'text'),
193 [('op1', 'text'),
194 ('op2', 'text')]),
194 ('op2', 'text')]),
195 ('class2',
195 ('class2',
196 [('op3', 'text')])]
196 [('op3', 'text')])]
197
197
198 This is used to get the minimal tags necessary when
198 This is used to get the minimal tags necessary when
199 rendering to html eg for a token stream ie.
199 rendering to html eg for a token stream ie.
200
200
201 <span class="A"><ins>he</ins>llo</span>
201 <span class="A"><ins>he</ins>llo</span>
202 vs
202 vs
203 <span class="A"><ins>he</ins></span><span class="A">llo</span>
203 <span class="A"><ins>he</ins></span><span class="A">llo</span>
204
204
205 If a 2 tuple is passed in, the output op will be an empty string.
205 If a 2 tuple is passed in, the output op will be an empty string.
206
206
207 eg:
207 eg:
208
208
209 >>> rollup_tokenstream([('classA', '', 'h'),
209 >>> rollup_tokenstream([('classA', '', 'h'),
210 ('classA', 'del', 'ell'),
210 ('classA', 'del', 'ell'),
211 ('classA', '', 'o'),
211 ('classA', '', 'o'),
212 ('classB', '', ' '),
212 ('classB', '', ' '),
213 ('classA', '', 'the'),
213 ('classA', '', 'the'),
214 ('classA', '', 're'),
214 ('classA', '', 're'),
215 ])
215 ])
216
216
217 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
217 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
218 ('classB', [('', ' ')],
218 ('classB', [('', ' ')],
219 ('classA', [('', 'there')]]
219 ('classA', [('', 'there')]]
220
220
221 """
221 """
222 if tokenstream and len(tokenstream[0]) == 2:
222 if tokenstream and len(tokenstream[0]) == 2:
223 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
223 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
224
224
225 result = []
225 result = []
226 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
226 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
227 ops = []
227 ops = []
228 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
228 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
229 text_buffer = []
229 text_buffer = []
230 for t_class, t_op, t_text in token_text_list:
230 for t_class, t_op, t_text in token_text_list:
231 text_buffer.append(t_text)
231 text_buffer.append(t_text)
232 ops.append((token_op, ''.join(text_buffer)))
232 ops.append((token_op, ''.join(text_buffer)))
233 result.append((token_class, ops))
233 result.append((token_class, ops))
234 return result
234 return result
235
235
236
236
237 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
237 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
238 """
238 """
239 Converts a list of (token_class, token_text) tuples to a list of
239 Converts a list of (token_class, token_text) tuples to a list of
240 (token_class, token_op, token_text) tuples where token_op is one of
240 (token_class, token_op, token_text) tuples where token_op is one of
241 ('ins', 'del', '')
241 ('ins', 'del', '')
242
242
243 :param old_tokens: list of (token_class, token_text) tuples of old line
243 :param old_tokens: list of (token_class, token_text) tuples of old line
244 :param new_tokens: list of (token_class, token_text) tuples of new line
244 :param new_tokens: list of (token_class, token_text) tuples of new line
245 :param use_diff_match_patch: boolean, will use google's diff match patch
245 :param use_diff_match_patch: boolean, will use google's diff match patch
246 library which has options to 'smooth' out the character by character
246 library which has options to 'smooth' out the character by character
247 differences making nicer ins/del blocks
247 differences making nicer ins/del blocks
248 """
248 """
249
249
250 old_tokens_result = []
250 old_tokens_result = []
251 new_tokens_result = []
251 new_tokens_result = []
252
252
253 similarity = difflib.SequenceMatcher(None,
253 similarity = difflib.SequenceMatcher(None,
254 ''.join(token_text for token_class, token_text in old_tokens),
254 ''.join(token_text for token_class, token_text in old_tokens),
255 ''.join(token_text for token_class, token_text in new_tokens)
255 ''.join(token_text for token_class, token_text in new_tokens)
256 ).ratio()
256 ).ratio()
257
257
258 if similarity < 0.6: # return, the blocks are too different
258 if similarity < 0.6: # return, the blocks are too different
259 for token_class, token_text in old_tokens:
259 for token_class, token_text in old_tokens:
260 old_tokens_result.append((token_class, '', token_text))
260 old_tokens_result.append((token_class, '', token_text))
261 for token_class, token_text in new_tokens:
261 for token_class, token_text in new_tokens:
262 new_tokens_result.append((token_class, '', token_text))
262 new_tokens_result.append((token_class, '', token_text))
263 return old_tokens_result, new_tokens_result, similarity
263 return old_tokens_result, new_tokens_result, similarity
264
264
265 token_sequence_matcher = difflib.SequenceMatcher(None,
265 token_sequence_matcher = difflib.SequenceMatcher(None,
266 [x[1] for x in old_tokens],
266 [x[1] for x in old_tokens],
267 [x[1] for x in new_tokens])
267 [x[1] for x in new_tokens])
268
268
269 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
269 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
270 # check the differences by token block types first to give a more
270 # check the differences by token block types first to give a more
271 # nicer "block" level replacement vs character diffs
271 # nicer "block" level replacement vs character diffs
272
272
273 if tag == 'equal':
273 if tag == 'equal':
274 for token_class, token_text in old_tokens[o1:o2]:
274 for token_class, token_text in old_tokens[o1:o2]:
275 old_tokens_result.append((token_class, '', token_text))
275 old_tokens_result.append((token_class, '', token_text))
276 for token_class, token_text in new_tokens[n1:n2]:
276 for token_class, token_text in new_tokens[n1:n2]:
277 new_tokens_result.append((token_class, '', token_text))
277 new_tokens_result.append((token_class, '', token_text))
278 elif tag == 'delete':
278 elif tag == 'delete':
279 for token_class, token_text in old_tokens[o1:o2]:
279 for token_class, token_text in old_tokens[o1:o2]:
280 old_tokens_result.append((token_class, 'del', token_text))
280 old_tokens_result.append((token_class, 'del', token_text))
281 elif tag == 'insert':
281 elif tag == 'insert':
282 for token_class, token_text in new_tokens[n1:n2]:
282 for token_class, token_text in new_tokens[n1:n2]:
283 new_tokens_result.append((token_class, 'ins', token_text))
283 new_tokens_result.append((token_class, 'ins', token_text))
284 elif tag == 'replace':
284 elif tag == 'replace':
285 # if same type token blocks must be replaced, do a diff on the
285 # if same type token blocks must be replaced, do a diff on the
286 # characters in the token blocks to show individual changes
286 # characters in the token blocks to show individual changes
287
287
288 old_char_tokens = []
288 old_char_tokens = []
289 new_char_tokens = []
289 new_char_tokens = []
290 for token_class, token_text in old_tokens[o1:o2]:
290 for token_class, token_text in old_tokens[o1:o2]:
291 for char in token_text:
291 for char in token_text:
292 old_char_tokens.append((token_class, char))
292 old_char_tokens.append((token_class, char))
293
293
294 for token_class, token_text in new_tokens[n1:n2]:
294 for token_class, token_text in new_tokens[n1:n2]:
295 for char in token_text:
295 for char in token_text:
296 new_char_tokens.append((token_class, char))
296 new_char_tokens.append((token_class, char))
297
297
298 old_string = ''.join([token_text for
298 old_string = ''.join([token_text for
299 token_class, token_text in old_char_tokens])
299 token_class, token_text in old_char_tokens])
300 new_string = ''.join([token_text for
300 new_string = ''.join([token_text for
301 token_class, token_text in new_char_tokens])
301 token_class, token_text in new_char_tokens])
302
302
303 char_sequence = difflib.SequenceMatcher(
303 char_sequence = difflib.SequenceMatcher(
304 None, old_string, new_string)
304 None, old_string, new_string)
305 copcodes = char_sequence.get_opcodes()
305 copcodes = char_sequence.get_opcodes()
306 obuffer, nbuffer = [], []
306 obuffer, nbuffer = [], []
307
307
308 if use_diff_match_patch:
308 if use_diff_match_patch:
309 dmp = diff_match_patch()
309 dmp = diff_match_patch()
310 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
310 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
311 reps = dmp.diff_main(old_string, new_string)
311 reps = dmp.diff_main(old_string, new_string)
312 dmp.diff_cleanupEfficiency(reps)
312 dmp.diff_cleanupEfficiency(reps)
313
313
314 a, b = 0, 0
314 a, b = 0, 0
315 for op, rep in reps:
315 for op, rep in reps:
316 l = len(rep)
316 l = len(rep)
317 if op == 0:
317 if op == 0:
318 for i, c in enumerate(rep):
318 for i, c in enumerate(rep):
319 obuffer.append((old_char_tokens[a+i][0], '', c))
319 obuffer.append((old_char_tokens[a+i][0], '', c))
320 nbuffer.append((new_char_tokens[b+i][0], '', c))
320 nbuffer.append((new_char_tokens[b+i][0], '', c))
321 a += l
321 a += l
322 b += l
322 b += l
323 elif op == -1:
323 elif op == -1:
324 for i, c in enumerate(rep):
324 for i, c in enumerate(rep):
325 obuffer.append((old_char_tokens[a+i][0], 'del', c))
325 obuffer.append((old_char_tokens[a+i][0], 'del', c))
326 a += l
326 a += l
327 elif op == 1:
327 elif op == 1:
328 for i, c in enumerate(rep):
328 for i, c in enumerate(rep):
329 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
329 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
330 b += l
330 b += l
331 else:
331 else:
332 for ctag, co1, co2, cn1, cn2 in copcodes:
332 for ctag, co1, co2, cn1, cn2 in copcodes:
333 if ctag == 'equal':
333 if ctag == 'equal':
334 for token_class, token_text in old_char_tokens[co1:co2]:
334 for token_class, token_text in old_char_tokens[co1:co2]:
335 obuffer.append((token_class, '', token_text))
335 obuffer.append((token_class, '', token_text))
336 for token_class, token_text in new_char_tokens[cn1:cn2]:
336 for token_class, token_text in new_char_tokens[cn1:cn2]:
337 nbuffer.append((token_class, '', token_text))
337 nbuffer.append((token_class, '', token_text))
338 elif ctag == 'delete':
338 elif ctag == 'delete':
339 for token_class, token_text in old_char_tokens[co1:co2]:
339 for token_class, token_text in old_char_tokens[co1:co2]:
340 obuffer.append((token_class, 'del', token_text))
340 obuffer.append((token_class, 'del', token_text))
341 elif ctag == 'insert':
341 elif ctag == 'insert':
342 for token_class, token_text in new_char_tokens[cn1:cn2]:
342 for token_class, token_text in new_char_tokens[cn1:cn2]:
343 nbuffer.append((token_class, 'ins', token_text))
343 nbuffer.append((token_class, 'ins', token_text))
344 elif ctag == 'replace':
344 elif ctag == 'replace':
345 for token_class, token_text in old_char_tokens[co1:co2]:
345 for token_class, token_text in old_char_tokens[co1:co2]:
346 obuffer.append((token_class, 'del', token_text))
346 obuffer.append((token_class, 'del', token_text))
347 for token_class, token_text in new_char_tokens[cn1:cn2]:
347 for token_class, token_text in new_char_tokens[cn1:cn2]:
348 nbuffer.append((token_class, 'ins', token_text))
348 nbuffer.append((token_class, 'ins', token_text))
349
349
350 old_tokens_result.extend(obuffer)
350 old_tokens_result.extend(obuffer)
351 new_tokens_result.extend(nbuffer)
351 new_tokens_result.extend(nbuffer)
352
352
353 return old_tokens_result, new_tokens_result, similarity
353 return old_tokens_result, new_tokens_result, similarity
354
354
355
355
356 def diffset_node_getter(commit):
356 def diffset_node_getter(commit):
357 def get_node(fname):
357 def get_node(fname):
358 try:
358 try:
359 return commit.get_node(fname)
359 return commit.get_node(fname)
360 except NodeDoesNotExistError:
360 except NodeDoesNotExistError:
361 return None
361 return None
362
362
363 return get_node
363 return get_node
364
364
365
365
366 class DiffSet(object):
366 class DiffSet(object):
367 """
367 """
368 An object for parsing the diff result from diffs.DiffProcessor and
368 An object for parsing the diff result from diffs.DiffProcessor and
369 adding highlighting, side by side/unified renderings and line diffs
369 adding highlighting, side by side/unified renderings and line diffs
370 """
370 """
371
371
372 HL_REAL = 'REAL' # highlights using original file, slow
372 HL_REAL = 'REAL' # highlights using original file, slow
373 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
373 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
374 # in the case of multiline code
374 # in the case of multiline code
375 HL_NONE = 'NONE' # no highlighting, fastest
375 HL_NONE = 'NONE' # no highlighting, fastest
376
376
377 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
377 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
378 source_repo_name=None,
378 source_repo_name=None,
379 source_node_getter=lambda filename: None,
379 source_node_getter=lambda filename: None,
380 target_node_getter=lambda filename: None,
380 target_node_getter=lambda filename: None,
381 source_nodes=None, target_nodes=None,
381 source_nodes=None, target_nodes=None,
382 # files over this size will use fast highlighting
382 # files over this size will use fast highlighting
383 max_file_size_limit=150 * 1024,
383 max_file_size_limit=150 * 1024,
384 ):
384 ):
385
385
386 self.highlight_mode = highlight_mode
386 self.highlight_mode = highlight_mode
387 self.highlighted_filenodes = {}
387 self.highlighted_filenodes = {}
388 self.source_node_getter = source_node_getter
388 self.source_node_getter = source_node_getter
389 self.target_node_getter = target_node_getter
389 self.target_node_getter = target_node_getter
390 self.source_nodes = source_nodes or {}
390 self.source_nodes = source_nodes or {}
391 self.target_nodes = target_nodes or {}
391 self.target_nodes = target_nodes or {}
392 self.repo_name = repo_name
392 self.repo_name = repo_name
393 self.source_repo_name = source_repo_name or repo_name
393 self.source_repo_name = source_repo_name or repo_name
394 self.max_file_size_limit = max_file_size_limit
394 self.max_file_size_limit = max_file_size_limit
395
395
396 def render_patchset(self, patchset, source_ref=None, target_ref=None):
396 def render_patchset(self, patchset, source_ref=None, target_ref=None):
397 diffset = AttributeDict(dict(
397 diffset = AttributeDict(dict(
398 lines_added=0,
398 lines_added=0,
399 lines_deleted=0,
399 lines_deleted=0,
400 changed_files=0,
400 changed_files=0,
401 files=[],
401 files=[],
402 file_stats={},
402 file_stats={},
403 limited_diff=isinstance(patchset, LimitedDiffContainer),
403 limited_diff=isinstance(patchset, LimitedDiffContainer),
404 repo_name=self.repo_name,
404 repo_name=self.repo_name,
405 source_repo_name=self.source_repo_name,
405 source_repo_name=self.source_repo_name,
406 source_ref=source_ref,
406 source_ref=source_ref,
407 target_ref=target_ref,
407 target_ref=target_ref,
408 ))
408 ))
409 for patch in patchset:
409 for patch in patchset:
410 diffset.file_stats[patch['filename']] = patch['stats']
410 diffset.file_stats[patch['filename']] = patch['stats']
411 filediff = self.render_patch(patch)
411 filediff = self.render_patch(patch)
412 filediff.diffset = StrictAttributeDict(dict(
412 filediff.diffset = StrictAttributeDict(dict(
413 source_ref=diffset.source_ref,
413 source_ref=diffset.source_ref,
414 target_ref=diffset.target_ref,
414 target_ref=diffset.target_ref,
415 repo_name=diffset.repo_name,
415 repo_name=diffset.repo_name,
416 source_repo_name=diffset.source_repo_name,
416 source_repo_name=diffset.source_repo_name,
417 ))
417 ))
418 diffset.files.append(filediff)
418 diffset.files.append(filediff)
419 diffset.changed_files += 1
419 diffset.changed_files += 1
420 if not patch['stats']['binary']:
420 if not patch['stats']['binary']:
421 diffset.lines_added += patch['stats']['added']
421 diffset.lines_added += patch['stats']['added']
422 diffset.lines_deleted += patch['stats']['deleted']
422 diffset.lines_deleted += patch['stats']['deleted']
423
423
424 return diffset
424 return diffset
425
425
426 _lexer_cache = {}
426 _lexer_cache = {}
427
427
428 def _get_lexer_for_filename(self, filename, filenode=None):
428 def _get_lexer_for_filename(self, filename, filenode=None):
429 # cached because we might need to call it twice for source/target
429 # cached because we might need to call it twice for source/target
430 if filename not in self._lexer_cache:
430 if filename not in self._lexer_cache:
431 if filenode:
431 if filenode:
432 lexer = filenode.lexer
432 lexer = filenode.lexer
433 extension = filenode.extension
433 extension = filenode.extension
434 else:
434 else:
435 lexer = FileNode.get_lexer(filename=filename)
435 lexer = FileNode.get_lexer(filename=filename)
436 extension = filename.split('.')[-1]
436 extension = filename.split('.')[-1]
437
437
438 lexer = get_custom_lexer(extension) or lexer
438 lexer = get_custom_lexer(extension) or lexer
439 self._lexer_cache[filename] = lexer
439 self._lexer_cache[filename] = lexer
440 return self._lexer_cache[filename]
440 return self._lexer_cache[filename]
441
441
442 def render_patch(self, patch):
442 def render_patch(self, patch):
443 log.debug('rendering diff for %r', patch['filename'])
443 log.debug('rendering diff for %r', patch['filename'])
444
444
445 source_filename = patch['original_filename']
445 source_filename = patch['original_filename']
446 target_filename = patch['filename']
446 target_filename = patch['filename']
447
447
448 source_lexer = plain_text_lexer
448 source_lexer = plain_text_lexer
449 target_lexer = plain_text_lexer
449 target_lexer = plain_text_lexer
450
450
451 if not patch['stats']['binary']:
451 if not patch['stats']['binary']:
452 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
452 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
453 hl_mode = node_hl_mode or self.highlight_mode
453 hl_mode = node_hl_mode or self.highlight_mode
454
454
455 if hl_mode == self.HL_REAL:
455 if hl_mode == self.HL_REAL:
456 if (source_filename and patch['operation'] in ('D', 'M')
456 if (source_filename and patch['operation'] in ('D', 'M')
457 and source_filename not in self.source_nodes):
457 and source_filename not in self.source_nodes):
458 self.source_nodes[source_filename] = (
458 self.source_nodes[source_filename] = (
459 self.source_node_getter(source_filename))
459 self.source_node_getter(source_filename))
460
460
461 if (target_filename and patch['operation'] in ('A', 'M')
461 if (target_filename and patch['operation'] in ('A', 'M')
462 and target_filename not in self.target_nodes):
462 and target_filename not in self.target_nodes):
463 self.target_nodes[target_filename] = (
463 self.target_nodes[target_filename] = (
464 self.target_node_getter(target_filename))
464 self.target_node_getter(target_filename))
465
465
466 elif hl_mode == self.HL_FAST:
466 elif hl_mode == self.HL_FAST:
467 source_lexer = self._get_lexer_for_filename(source_filename)
467 source_lexer = self._get_lexer_for_filename(source_filename)
468 target_lexer = self._get_lexer_for_filename(target_filename)
468 target_lexer = self._get_lexer_for_filename(target_filename)
469
469
470 source_file = self.source_nodes.get(source_filename, source_filename)
470 source_file = self.source_nodes.get(source_filename, source_filename)
471 target_file = self.target_nodes.get(target_filename, target_filename)
471 target_file = self.target_nodes.get(target_filename, target_filename)
472
472
473 source_filenode, target_filenode = None, None
473 source_filenode, target_filenode = None, None
474
474
475 # TODO: dan: FileNode.lexer works on the content of the file - which
475 # TODO: dan: FileNode.lexer works on the content of the file - which
476 # can be slow - issue #4289 explains a lexer clean up - which once
476 # can be slow - issue #4289 explains a lexer clean up - which once
477 # done can allow caching a lexer for a filenode to avoid the file lookup
477 # done can allow caching a lexer for a filenode to avoid the file lookup
478 if isinstance(source_file, FileNode):
478 if isinstance(source_file, FileNode):
479 source_filenode = source_file
479 source_filenode = source_file
480 #source_lexer = source_file.lexer
480 #source_lexer = source_file.lexer
481 source_lexer = self._get_lexer_for_filename(source_filename)
481 source_lexer = self._get_lexer_for_filename(source_filename)
482 source_file.lexer = source_lexer
482 source_file.lexer = source_lexer
483
483
484 if isinstance(target_file, FileNode):
484 if isinstance(target_file, FileNode):
485 target_filenode = target_file
485 target_filenode = target_file
486 #target_lexer = target_file.lexer
486 #target_lexer = target_file.lexer
487 target_lexer = self._get_lexer_for_filename(target_filename)
487 target_lexer = self._get_lexer_for_filename(target_filename)
488 target_file.lexer = target_lexer
488 target_file.lexer = target_lexer
489
489
490 source_file_path, target_file_path = None, None
490 source_file_path, target_file_path = None, None
491
491
492 if source_filename != '/dev/null':
492 if source_filename != '/dev/null':
493 source_file_path = source_filename
493 source_file_path = source_filename
494 if target_filename != '/dev/null':
494 if target_filename != '/dev/null':
495 target_file_path = target_filename
495 target_file_path = target_filename
496
496
497 source_file_type = source_lexer.name
497 source_file_type = source_lexer.name
498 target_file_type = target_lexer.name
498 target_file_type = target_lexer.name
499
499
500 filediff = AttributeDict({
500 filediff = AttributeDict({
501 'source_file_path': source_file_path,
501 'source_file_path': source_file_path,
502 'target_file_path': target_file_path,
502 'target_file_path': target_file_path,
503 'source_filenode': source_filenode,
503 'source_filenode': source_filenode,
504 'target_filenode': target_filenode,
504 'target_filenode': target_filenode,
505 'source_file_type': target_file_type,
505 'source_file_type': target_file_type,
506 'target_file_type': source_file_type,
506 'target_file_type': source_file_type,
507 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
507 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
508 'operation': patch['operation'],
508 'operation': patch['operation'],
509 'source_mode': patch['stats']['old_mode'],
509 'source_mode': patch['stats']['old_mode'],
510 'target_mode': patch['stats']['new_mode'],
510 'target_mode': patch['stats']['new_mode'],
511 'limited_diff': isinstance(patch, LimitedDiffContainer),
511 'limited_diff': isinstance(patch, LimitedDiffContainer),
512 'hunks': [],
512 'hunks': [],
513 'hunk_ops': None,
513 'hunk_ops': None,
514 'diffset': self,
514 'diffset': self,
515 })
515 })
516 file_chunks = patch['chunks'][1:]
516 file_chunks = patch['chunks'][1:]
517 for hunk in file_chunks:
517 for hunk in file_chunks:
518 hunkbit = self.parse_hunk(hunk, source_file, target_file)
518 hunkbit = self.parse_hunk(hunk, source_file, target_file)
519 hunkbit.source_file_path = source_file_path
519 hunkbit.source_file_path = source_file_path
520 hunkbit.target_file_path = target_file_path
520 hunkbit.target_file_path = target_file_path
521 filediff.hunks.append(hunkbit)
521 filediff.hunks.append(hunkbit)
522
522
523 # Simulate hunk on OPS type line which doesn't really contain any diff
523 # Simulate hunk on OPS type line which doesn't really contain any diff
524 # this allows commenting on those
524 # this allows commenting on those
525 if not file_chunks:
525 if not file_chunks:
526 actions = []
526 actions = []
527 for op_id, op_text in filediff.patch['stats']['ops'].items():
527 for op_id, op_text in filediff.patch['stats']['ops'].items():
528 if op_id == DEL_FILENODE:
528 if op_id == DEL_FILENODE:
529 actions.append(u'file was deleted')
529 actions.append(u'file was removed')
530 elif op_id == BIN_FILENODE:
530 elif op_id == BIN_FILENODE:
531 actions.append(u'binary diff hidden')
531 actions.append(u'binary diff hidden')
532 else:
532 else:
533 actions.append(safe_unicode(op_text))
533 actions.append(safe_unicode(op_text))
534 action_line = u'NO CONTENT: ' + \
534 action_line = u'NO CONTENT: ' + \
535 u', '.join(actions) or u'UNDEFINED_ACTION'
535 u', '.join(actions) or u'UNDEFINED_ACTION'
536
536
537 hunk_ops = {'source_length': 0, 'source_start': 0,
537 hunk_ops = {'source_length': 0, 'source_start': 0,
538 'lines': [
538 'lines': [
539 {'new_lineno': 0, 'old_lineno': 1,
539 {'new_lineno': 0, 'old_lineno': 1,
540 'action': 'unmod-no-hl', 'line': action_line}
540 'action': 'unmod-no-hl', 'line': action_line}
541 ],
541 ],
542 'section_header': u'', 'target_start': 1, 'target_length': 1}
542 'section_header': u'', 'target_start': 1, 'target_length': 1}
543
543
544 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
544 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
545 hunkbit.source_file_path = source_file_path
545 hunkbit.source_file_path = source_file_path
546 hunkbit.target_file_path = target_file_path
546 hunkbit.target_file_path = target_file_path
547 filediff.hunk_ops = hunkbit
547 filediff.hunk_ops = hunkbit
548 return filediff
548 return filediff
549
549
550 def parse_hunk(self, hunk, source_file, target_file):
550 def parse_hunk(self, hunk, source_file, target_file):
551 result = AttributeDict(dict(
551 result = AttributeDict(dict(
552 source_start=hunk['source_start'],
552 source_start=hunk['source_start'],
553 source_length=hunk['source_length'],
553 source_length=hunk['source_length'],
554 target_start=hunk['target_start'],
554 target_start=hunk['target_start'],
555 target_length=hunk['target_length'],
555 target_length=hunk['target_length'],
556 section_header=hunk['section_header'],
556 section_header=hunk['section_header'],
557 lines=[],
557 lines=[],
558 ))
558 ))
559 before, after = [], []
559 before, after = [], []
560
560
561 for line in hunk['lines']:
561 for line in hunk['lines']:
562 if line['action'] in ['unmod', 'unmod-no-hl']:
562 if line['action'] in ['unmod', 'unmod-no-hl']:
563 no_hl = line['action'] == 'unmod-no-hl'
563 no_hl = line['action'] == 'unmod-no-hl'
564 result.lines.extend(
564 result.lines.extend(
565 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
565 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
566 after.append(line)
566 after.append(line)
567 before.append(line)
567 before.append(line)
568 elif line['action'] == 'add':
568 elif line['action'] == 'add':
569 after.append(line)
569 after.append(line)
570 elif line['action'] == 'del':
570 elif line['action'] == 'del':
571 before.append(line)
571 before.append(line)
572 elif line['action'] == 'old-no-nl':
572 elif line['action'] == 'old-no-nl':
573 before.append(line)
573 before.append(line)
574 elif line['action'] == 'new-no-nl':
574 elif line['action'] == 'new-no-nl':
575 after.append(line)
575 after.append(line)
576
576
577 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
577 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
578 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
578 no_hl = {x for x in all_actions} == {'unmod-no-hl'}
579 result.lines.extend(
579 result.lines.extend(
580 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
580 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
581 # NOTE(marcink): we must keep list() call here so we can cache the result...
581 # NOTE(marcink): we must keep list() call here so we can cache the result...
582 result.unified = list(self.as_unified(result.lines))
582 result.unified = list(self.as_unified(result.lines))
583 result.sideside = result.lines
583 result.sideside = result.lines
584
584
585 return result
585 return result
586
586
587 def parse_lines(self, before_lines, after_lines, source_file, target_file,
587 def parse_lines(self, before_lines, after_lines, source_file, target_file,
588 no_hl=False):
588 no_hl=False):
589 # TODO: dan: investigate doing the diff comparison and fast highlighting
589 # TODO: dan: investigate doing the diff comparison and fast highlighting
590 # on the entire before and after buffered block lines rather than by
590 # on the entire before and after buffered block lines rather than by
591 # line, this means we can get better 'fast' highlighting if the context
591 # line, this means we can get better 'fast' highlighting if the context
592 # allows it - eg.
592 # allows it - eg.
593 # line 4: """
593 # line 4: """
594 # line 5: this gets highlighted as a string
594 # line 5: this gets highlighted as a string
595 # line 6: """
595 # line 6: """
596
596
597 lines = []
597 lines = []
598
598
599 before_newline = AttributeDict()
599 before_newline = AttributeDict()
600 after_newline = AttributeDict()
600 after_newline = AttributeDict()
601 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
601 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
602 before_newline_line = before_lines.pop(-1)
602 before_newline_line = before_lines.pop(-1)
603 before_newline.content = '\n {}'.format(
603 before_newline.content = '\n {}'.format(
604 render_tokenstream(
604 render_tokenstream(
605 [(x[0], '', x[1])
605 [(x[0], '', x[1])
606 for x in [('nonl', before_newline_line['line'])]]))
606 for x in [('nonl', before_newline_line['line'])]]))
607
607
608 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
608 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
609 after_newline_line = after_lines.pop(-1)
609 after_newline_line = after_lines.pop(-1)
610 after_newline.content = '\n {}'.format(
610 after_newline.content = '\n {}'.format(
611 render_tokenstream(
611 render_tokenstream(
612 [(x[0], '', x[1])
612 [(x[0], '', x[1])
613 for x in [('nonl', after_newline_line['line'])]]))
613 for x in [('nonl', after_newline_line['line'])]]))
614
614
615 while before_lines or after_lines:
615 while before_lines or after_lines:
616 before, after = None, None
616 before, after = None, None
617 before_tokens, after_tokens = None, None
617 before_tokens, after_tokens = None, None
618
618
619 if before_lines:
619 if before_lines:
620 before = before_lines.pop(0)
620 before = before_lines.pop(0)
621 if after_lines:
621 if after_lines:
622 after = after_lines.pop(0)
622 after = after_lines.pop(0)
623
623
624 original = AttributeDict()
624 original = AttributeDict()
625 modified = AttributeDict()
625 modified = AttributeDict()
626
626
627 if before:
627 if before:
628 if before['action'] == 'old-no-nl':
628 if before['action'] == 'old-no-nl':
629 before_tokens = [('nonl', before['line'])]
629 before_tokens = [('nonl', before['line'])]
630 else:
630 else:
631 before_tokens = self.get_line_tokens(
631 before_tokens = self.get_line_tokens(
632 line_text=before['line'], line_number=before['old_lineno'],
632 line_text=before['line'], line_number=before['old_lineno'],
633 input_file=source_file, no_hl=no_hl)
633 input_file=source_file, no_hl=no_hl)
634 original.lineno = before['old_lineno']
634 original.lineno = before['old_lineno']
635 original.content = before['line']
635 original.content = before['line']
636 original.action = self.action_to_op(before['action'])
636 original.action = self.action_to_op(before['action'])
637
637
638 original.get_comment_args = (
638 original.get_comment_args = (
639 source_file, 'o', before['old_lineno'])
639 source_file, 'o', before['old_lineno'])
640
640
641 if after:
641 if after:
642 if after['action'] == 'new-no-nl':
642 if after['action'] == 'new-no-nl':
643 after_tokens = [('nonl', after['line'])]
643 after_tokens = [('nonl', after['line'])]
644 else:
644 else:
645 after_tokens = self.get_line_tokens(
645 after_tokens = self.get_line_tokens(
646 line_text=after['line'], line_number=after['new_lineno'],
646 line_text=after['line'], line_number=after['new_lineno'],
647 input_file=target_file, no_hl=no_hl)
647 input_file=target_file, no_hl=no_hl)
648 modified.lineno = after['new_lineno']
648 modified.lineno = after['new_lineno']
649 modified.content = after['line']
649 modified.content = after['line']
650 modified.action = self.action_to_op(after['action'])
650 modified.action = self.action_to_op(after['action'])
651
651
652 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
652 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
653
653
654 # diff the lines
654 # diff the lines
655 if before_tokens and after_tokens:
655 if before_tokens and after_tokens:
656 o_tokens, m_tokens, similarity = tokens_diff(
656 o_tokens, m_tokens, similarity = tokens_diff(
657 before_tokens, after_tokens)
657 before_tokens, after_tokens)
658 original.content = render_tokenstream(o_tokens)
658 original.content = render_tokenstream(o_tokens)
659 modified.content = render_tokenstream(m_tokens)
659 modified.content = render_tokenstream(m_tokens)
660 elif before_tokens:
660 elif before_tokens:
661 original.content = render_tokenstream(
661 original.content = render_tokenstream(
662 [(x[0], '', x[1]) for x in before_tokens])
662 [(x[0], '', x[1]) for x in before_tokens])
663 elif after_tokens:
663 elif after_tokens:
664 modified.content = render_tokenstream(
664 modified.content = render_tokenstream(
665 [(x[0], '', x[1]) for x in after_tokens])
665 [(x[0], '', x[1]) for x in after_tokens])
666
666
667 if not before_lines and before_newline:
667 if not before_lines and before_newline:
668 original.content += before_newline.content
668 original.content += before_newline.content
669 before_newline = None
669 before_newline = None
670 if not after_lines and after_newline:
670 if not after_lines and after_newline:
671 modified.content += after_newline.content
671 modified.content += after_newline.content
672 after_newline = None
672 after_newline = None
673
673
674 lines.append(AttributeDict({
674 lines.append(AttributeDict({
675 'original': original,
675 'original': original,
676 'modified': modified,
676 'modified': modified,
677 }))
677 }))
678
678
679 return lines
679 return lines
680
680
681 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
681 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
682 filenode = None
682 filenode = None
683 filename = None
683 filename = None
684
684
685 if isinstance(input_file, basestring):
685 if isinstance(input_file, basestring):
686 filename = input_file
686 filename = input_file
687 elif isinstance(input_file, FileNode):
687 elif isinstance(input_file, FileNode):
688 filenode = input_file
688 filenode = input_file
689 filename = input_file.unicode_path
689 filename = input_file.unicode_path
690
690
691 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
691 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
692 if hl_mode == self.HL_REAL and filenode:
692 if hl_mode == self.HL_REAL and filenode:
693 lexer = self._get_lexer_for_filename(filename)
693 lexer = self._get_lexer_for_filename(filename)
694 file_size_allowed = input_file.size < self.max_file_size_limit
694 file_size_allowed = input_file.size < self.max_file_size_limit
695 if line_number and file_size_allowed:
695 if line_number and file_size_allowed:
696 return self.get_tokenized_filenode_line(
696 return self.get_tokenized_filenode_line(
697 input_file, line_number, lexer)
697 input_file, line_number, lexer)
698
698
699 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
699 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
700 lexer = self._get_lexer_for_filename(filename)
700 lexer = self._get_lexer_for_filename(filename)
701 return list(tokenize_string(line_text, lexer))
701 return list(tokenize_string(line_text, lexer))
702
702
703 return list(tokenize_string(line_text, plain_text_lexer))
703 return list(tokenize_string(line_text, plain_text_lexer))
704
704
705 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
705 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
706
706
707 if filenode not in self.highlighted_filenodes:
707 if filenode not in self.highlighted_filenodes:
708 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
708 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
709 self.highlighted_filenodes[filenode] = tokenized_lines
709 self.highlighted_filenodes[filenode] = tokenized_lines
710 return self.highlighted_filenodes[filenode][line_number - 1]
710 return self.highlighted_filenodes[filenode][line_number - 1]
711
711
712 def action_to_op(self, action):
712 def action_to_op(self, action):
713 return {
713 return {
714 'add': '+',
714 'add': '+',
715 'del': '-',
715 'del': '-',
716 'unmod': ' ',
716 'unmod': ' ',
717 'unmod-no-hl': ' ',
717 'unmod-no-hl': ' ',
718 'old-no-nl': ' ',
718 'old-no-nl': ' ',
719 'new-no-nl': ' ',
719 'new-no-nl': ' ',
720 }.get(action, action)
720 }.get(action, action)
721
721
722 def as_unified(self, lines):
722 def as_unified(self, lines):
723 """
723 """
724 Return a generator that yields the lines of a diff in unified order
724 Return a generator that yields the lines of a diff in unified order
725 """
725 """
726 def generator():
726 def generator():
727 buf = []
727 buf = []
728 for line in lines:
728 for line in lines:
729
729
730 if buf and not line.original or line.original.action == ' ':
730 if buf and not line.original or line.original.action == ' ':
731 for b in buf:
731 for b in buf:
732 yield b
732 yield b
733 buf = []
733 buf = []
734
734
735 if line.original:
735 if line.original:
736 if line.original.action == ' ':
736 if line.original.action == ' ':
737 yield (line.original.lineno, line.modified.lineno,
737 yield (line.original.lineno, line.modified.lineno,
738 line.original.action, line.original.content,
738 line.original.action, line.original.content,
739 line.original.get_comment_args)
739 line.original.get_comment_args)
740 continue
740 continue
741
741
742 if line.original.action == '-':
742 if line.original.action == '-':
743 yield (line.original.lineno, None,
743 yield (line.original.lineno, None,
744 line.original.action, line.original.content,
744 line.original.action, line.original.content,
745 line.original.get_comment_args)
745 line.original.get_comment_args)
746
746
747 if line.modified.action == '+':
747 if line.modified.action == '+':
748 buf.append((
748 buf.append((
749 None, line.modified.lineno,
749 None, line.modified.lineno,
750 line.modified.action, line.modified.content,
750 line.modified.action, line.modified.content,
751 line.modified.get_comment_args))
751 line.modified.get_comment_args))
752 continue
752 continue
753
753
754 if line.modified:
754 if line.modified:
755 yield (None, line.modified.lineno,
755 yield (None, line.modified.lineno,
756 line.modified.action, line.modified.content,
756 line.modified.action, line.modified.content,
757 line.modified.get_comment_args)
757 line.modified.get_comment_args)
758
758
759 for b in buf:
759 for b in buf:
760 yield b
760 yield b
761
761
762 return generator()
762 return generator()
General Comments 0
You need to be logged in to leave comments. Login now