##// END OF EJS Templates
comments: save comments that are not rendered to be displayed as outdated....
marcink -
r1258:70c673b5 default
parent child Browse files
Show More
@@ -1,668 +1,687 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2016 RhodeCode GmbH
3 # Copyright (C) 2011-2016 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 import difflib
23 from itertools import groupby
23 from itertools import groupby
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from rhodecode.lib.helpers import (
27 from rhodecode.lib.helpers import (
28 get_lexer_for_filenode, get_lexer_safe, html_escape)
28 get_lexer_for_filenode, get_lexer_safe, html_escape)
29 from rhodecode.lib.utils2 import AttributeDict
29 from rhodecode.lib.utils2 import AttributeDict
30 from rhodecode.lib.vcs.nodes import FileNode
30 from rhodecode.lib.vcs.nodes import FileNode
31 from rhodecode.lib.diff_match_patch import diff_match_patch
31 from rhodecode.lib.diff_match_patch import diff_match_patch
32 from rhodecode.lib.diffs import LimitedDiffContainer
32 from rhodecode.lib.diffs import LimitedDiffContainer
33 from pygments.lexers import get_lexer_by_name
33 from pygments.lexers import get_lexer_by_name
34
34
35 plain_text_lexer = get_lexer_by_name(
35 plain_text_lexer = get_lexer_by_name(
36 'text', stripall=False, stripnl=False, ensurenl=False)
36 'text', stripall=False, stripnl=False, ensurenl=False)
37
37
38
38
39 log = logging.getLogger()
39 log = logging.getLogger()
40
40
41
41
42 def filenode_as_lines_tokens(filenode, lexer=None):
42 def filenode_as_lines_tokens(filenode, lexer=None):
43 lexer = lexer or get_lexer_for_filenode(filenode)
43 lexer = lexer or get_lexer_for_filenode(filenode)
44 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
44 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
45 tokens = tokenize_string(filenode.content, lexer)
45 tokens = tokenize_string(filenode.content, lexer)
46 lines = split_token_stream(tokens, split_string='\n')
46 lines = split_token_stream(tokens, split_string='\n')
47 rv = list(lines)
47 rv = list(lines)
48 return rv
48 return rv
49
49
50
50
51 def tokenize_string(content, lexer):
51 def tokenize_string(content, lexer):
52 """
52 """
53 Use pygments to tokenize some content based on a lexer
53 Use pygments to tokenize some content based on a lexer
54 ensuring all original new lines and whitespace is preserved
54 ensuring all original new lines and whitespace is preserved
55 """
55 """
56
56
57 lexer.stripall = False
57 lexer.stripall = False
58 lexer.stripnl = False
58 lexer.stripnl = False
59 lexer.ensurenl = False
59 lexer.ensurenl = False
60 for token_type, token_text in lex(content, lexer):
60 for token_type, token_text in lex(content, lexer):
61 yield pygment_token_class(token_type), token_text
61 yield pygment_token_class(token_type), token_text
62
62
63
63
64 def split_token_stream(tokens, split_string=u'\n'):
64 def split_token_stream(tokens, split_string=u'\n'):
65 """
65 """
66 Take a list of (TokenType, text) tuples and split them by a string
66 Take a list of (TokenType, text) tuples and split them by a string
67
67
68 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
68 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
69 [(TEXT, 'some'), (TEXT, 'text'),
69 [(TEXT, 'some'), (TEXT, 'text'),
70 (TEXT, 'more'), (TEXT, 'text')]
70 (TEXT, 'more'), (TEXT, 'text')]
71 """
71 """
72
72
73 buffer = []
73 buffer = []
74 for token_class, token_text in tokens:
74 for token_class, token_text in tokens:
75 parts = token_text.split(split_string)
75 parts = token_text.split(split_string)
76 for part in parts[:-1]:
76 for part in parts[:-1]:
77 buffer.append((token_class, part))
77 buffer.append((token_class, part))
78 yield buffer
78 yield buffer
79 buffer = []
79 buffer = []
80
80
81 buffer.append((token_class, parts[-1]))
81 buffer.append((token_class, parts[-1]))
82
82
83 if buffer:
83 if buffer:
84 yield buffer
84 yield buffer
85
85
86
86
87 def filenode_as_annotated_lines_tokens(filenode):
87 def filenode_as_annotated_lines_tokens(filenode):
88 """
88 """
89 Take a file node and return a list of annotations => lines, if no annotation
89 Take a file node and return a list of annotations => lines, if no annotation
90 is found, it will be None.
90 is found, it will be None.
91
91
92 eg:
92 eg:
93
93
94 [
94 [
95 (annotation1, [
95 (annotation1, [
96 (1, line1_tokens_list),
96 (1, line1_tokens_list),
97 (2, line2_tokens_list),
97 (2, line2_tokens_list),
98 ]),
98 ]),
99 (annotation2, [
99 (annotation2, [
100 (3, line1_tokens_list),
100 (3, line1_tokens_list),
101 ]),
101 ]),
102 (None, [
102 (None, [
103 (4, line1_tokens_list),
103 (4, line1_tokens_list),
104 ]),
104 ]),
105 (annotation1, [
105 (annotation1, [
106 (5, line1_tokens_list),
106 (5, line1_tokens_list),
107 (6, line2_tokens_list),
107 (6, line2_tokens_list),
108 ])
108 ])
109 ]
109 ]
110 """
110 """
111
111
112 commit_cache = {} # cache commit_getter lookups
112 commit_cache = {} # cache commit_getter lookups
113
113
114 def _get_annotation(commit_id, commit_getter):
114 def _get_annotation(commit_id, commit_getter):
115 if commit_id not in commit_cache:
115 if commit_id not in commit_cache:
116 commit_cache[commit_id] = commit_getter()
116 commit_cache[commit_id] = commit_getter()
117 return commit_cache[commit_id]
117 return commit_cache[commit_id]
118
118
119 annotation_lookup = {
119 annotation_lookup = {
120 line_no: _get_annotation(commit_id, commit_getter)
120 line_no: _get_annotation(commit_id, commit_getter)
121 for line_no, commit_id, commit_getter, line_content
121 for line_no, commit_id, commit_getter, line_content
122 in filenode.annotate
122 in filenode.annotate
123 }
123 }
124
124
125 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
125 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
126 for line_no, tokens
126 for line_no, tokens
127 in enumerate(filenode_as_lines_tokens(filenode), 1))
127 in enumerate(filenode_as_lines_tokens(filenode), 1))
128
128
129 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
129 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
130
130
131 for annotation, group in grouped_annotations_lines:
131 for annotation, group in grouped_annotations_lines:
132 yield (
132 yield (
133 annotation, [(line_no, tokens)
133 annotation, [(line_no, tokens)
134 for (_, line_no, tokens) in group]
134 for (_, line_no, tokens) in group]
135 )
135 )
136
136
137
137
138 def render_tokenstream(tokenstream):
138 def render_tokenstream(tokenstream):
139 result = []
139 result = []
140 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
140 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
141
141
142 if token_class:
142 if token_class:
143 result.append(u'<span class="%s">' % token_class)
143 result.append(u'<span class="%s">' % token_class)
144 else:
144 else:
145 result.append(u'<span>')
145 result.append(u'<span>')
146
146
147 for op_tag, token_text in token_ops_texts:
147 for op_tag, token_text in token_ops_texts:
148
148
149 if op_tag:
149 if op_tag:
150 result.append(u'<%s>' % op_tag)
150 result.append(u'<%s>' % op_tag)
151
151
152 escaped_text = html_escape(token_text)
152 escaped_text = html_escape(token_text)
153
153
154 # TODO: dan: investigate showing hidden characters like space/nl/tab
154 # TODO: dan: investigate showing hidden characters like space/nl/tab
155 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
155 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
156 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
156 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
157 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
157 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
158
158
159 result.append(escaped_text)
159 result.append(escaped_text)
160
160
161 if op_tag:
161 if op_tag:
162 result.append(u'</%s>' % op_tag)
162 result.append(u'</%s>' % op_tag)
163
163
164 result.append(u'</span>')
164 result.append(u'</span>')
165
165
166 html = ''.join(result)
166 html = ''.join(result)
167 return html
167 return html
168
168
169
169
170 def rollup_tokenstream(tokenstream):
170 def rollup_tokenstream(tokenstream):
171 """
171 """
172 Group a token stream of the format:
172 Group a token stream of the format:
173
173
174 ('class', 'op', 'text')
174 ('class', 'op', 'text')
175 or
175 or
176 ('class', 'text')
176 ('class', 'text')
177
177
178 into
178 into
179
179
180 [('class1',
180 [('class1',
181 [('op1', 'text'),
181 [('op1', 'text'),
182 ('op2', 'text')]),
182 ('op2', 'text')]),
183 ('class2',
183 ('class2',
184 [('op3', 'text')])]
184 [('op3', 'text')])]
185
185
186 This is used to get the minimal tags necessary when
186 This is used to get the minimal tags necessary when
187 rendering to html eg for a token stream ie.
187 rendering to html eg for a token stream ie.
188
188
189 <span class="A"><ins>he</ins>llo</span>
189 <span class="A"><ins>he</ins>llo</span>
190 vs
190 vs
191 <span class="A"><ins>he</ins></span><span class="A">llo</span>
191 <span class="A"><ins>he</ins></span><span class="A">llo</span>
192
192
193 If a 2 tuple is passed in, the output op will be an empty string.
193 If a 2 tuple is passed in, the output op will be an empty string.
194
194
195 eg:
195 eg:
196
196
197 >>> rollup_tokenstream([('classA', '', 'h'),
197 >>> rollup_tokenstream([('classA', '', 'h'),
198 ('classA', 'del', 'ell'),
198 ('classA', 'del', 'ell'),
199 ('classA', '', 'o'),
199 ('classA', '', 'o'),
200 ('classB', '', ' '),
200 ('classB', '', ' '),
201 ('classA', '', 'the'),
201 ('classA', '', 'the'),
202 ('classA', '', 're'),
202 ('classA', '', 're'),
203 ])
203 ])
204
204
205 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
205 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
206 ('classB', [('', ' ')],
206 ('classB', [('', ' ')],
207 ('classA', [('', 'there')]]
207 ('classA', [('', 'there')]]
208
208
209 """
209 """
210 if tokenstream and len(tokenstream[0]) == 2:
210 if tokenstream and len(tokenstream[0]) == 2:
211 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
211 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
212
212
213 result = []
213 result = []
214 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
214 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
215 ops = []
215 ops = []
216 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
216 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
217 text_buffer = []
217 text_buffer = []
218 for t_class, t_op, t_text in token_text_list:
218 for t_class, t_op, t_text in token_text_list:
219 text_buffer.append(t_text)
219 text_buffer.append(t_text)
220 ops.append((token_op, ''.join(text_buffer)))
220 ops.append((token_op, ''.join(text_buffer)))
221 result.append((token_class, ops))
221 result.append((token_class, ops))
222 return result
222 return result
223
223
224
224
225 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
225 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
226 """
226 """
227 Converts a list of (token_class, token_text) tuples to a list of
227 Converts a list of (token_class, token_text) tuples to a list of
228 (token_class, token_op, token_text) tuples where token_op is one of
228 (token_class, token_op, token_text) tuples where token_op is one of
229 ('ins', 'del', '')
229 ('ins', 'del', '')
230
230
231 :param old_tokens: list of (token_class, token_text) tuples of old line
231 :param old_tokens: list of (token_class, token_text) tuples of old line
232 :param new_tokens: list of (token_class, token_text) tuples of new line
232 :param new_tokens: list of (token_class, token_text) tuples of new line
233 :param use_diff_match_patch: boolean, will use google's diff match patch
233 :param use_diff_match_patch: boolean, will use google's diff match patch
234 library which has options to 'smooth' out the character by character
234 library which has options to 'smooth' out the character by character
235 differences making nicer ins/del blocks
235 differences making nicer ins/del blocks
236 """
236 """
237
237
238 old_tokens_result = []
238 old_tokens_result = []
239 new_tokens_result = []
239 new_tokens_result = []
240
240
241 similarity = difflib.SequenceMatcher(None,
241 similarity = difflib.SequenceMatcher(None,
242 ''.join(token_text for token_class, token_text in old_tokens),
242 ''.join(token_text for token_class, token_text in old_tokens),
243 ''.join(token_text for token_class, token_text in new_tokens)
243 ''.join(token_text for token_class, token_text in new_tokens)
244 ).ratio()
244 ).ratio()
245
245
246 if similarity < 0.6: # return, the blocks are too different
246 if similarity < 0.6: # return, the blocks are too different
247 for token_class, token_text in old_tokens:
247 for token_class, token_text in old_tokens:
248 old_tokens_result.append((token_class, '', token_text))
248 old_tokens_result.append((token_class, '', token_text))
249 for token_class, token_text in new_tokens:
249 for token_class, token_text in new_tokens:
250 new_tokens_result.append((token_class, '', token_text))
250 new_tokens_result.append((token_class, '', token_text))
251 return old_tokens_result, new_tokens_result, similarity
251 return old_tokens_result, new_tokens_result, similarity
252
252
253 token_sequence_matcher = difflib.SequenceMatcher(None,
253 token_sequence_matcher = difflib.SequenceMatcher(None,
254 [x[1] for x in old_tokens],
254 [x[1] for x in old_tokens],
255 [x[1] for x in new_tokens])
255 [x[1] for x in new_tokens])
256
256
257 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
257 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
258 # check the differences by token block types first to give a more
258 # check the differences by token block types first to give a more
259 # nicer "block" level replacement vs character diffs
259 # nicer "block" level replacement vs character diffs
260
260
261 if tag == 'equal':
261 if tag == 'equal':
262 for token_class, token_text in old_tokens[o1:o2]:
262 for token_class, token_text in old_tokens[o1:o2]:
263 old_tokens_result.append((token_class, '', token_text))
263 old_tokens_result.append((token_class, '', token_text))
264 for token_class, token_text in new_tokens[n1:n2]:
264 for token_class, token_text in new_tokens[n1:n2]:
265 new_tokens_result.append((token_class, '', token_text))
265 new_tokens_result.append((token_class, '', token_text))
266 elif tag == 'delete':
266 elif tag == 'delete':
267 for token_class, token_text in old_tokens[o1:o2]:
267 for token_class, token_text in old_tokens[o1:o2]:
268 old_tokens_result.append((token_class, 'del', token_text))
268 old_tokens_result.append((token_class, 'del', token_text))
269 elif tag == 'insert':
269 elif tag == 'insert':
270 for token_class, token_text in new_tokens[n1:n2]:
270 for token_class, token_text in new_tokens[n1:n2]:
271 new_tokens_result.append((token_class, 'ins', token_text))
271 new_tokens_result.append((token_class, 'ins', token_text))
272 elif tag == 'replace':
272 elif tag == 'replace':
273 # if same type token blocks must be replaced, do a diff on the
273 # if same type token blocks must be replaced, do a diff on the
274 # characters in the token blocks to show individual changes
274 # characters in the token blocks to show individual changes
275
275
276 old_char_tokens = []
276 old_char_tokens = []
277 new_char_tokens = []
277 new_char_tokens = []
278 for token_class, token_text in old_tokens[o1:o2]:
278 for token_class, token_text in old_tokens[o1:o2]:
279 for char in token_text:
279 for char in token_text:
280 old_char_tokens.append((token_class, char))
280 old_char_tokens.append((token_class, char))
281
281
282 for token_class, token_text in new_tokens[n1:n2]:
282 for token_class, token_text in new_tokens[n1:n2]:
283 for char in token_text:
283 for char in token_text:
284 new_char_tokens.append((token_class, char))
284 new_char_tokens.append((token_class, char))
285
285
286 old_string = ''.join([token_text for
286 old_string = ''.join([token_text for
287 token_class, token_text in old_char_tokens])
287 token_class, token_text in old_char_tokens])
288 new_string = ''.join([token_text for
288 new_string = ''.join([token_text for
289 token_class, token_text in new_char_tokens])
289 token_class, token_text in new_char_tokens])
290
290
291 char_sequence = difflib.SequenceMatcher(
291 char_sequence = difflib.SequenceMatcher(
292 None, old_string, new_string)
292 None, old_string, new_string)
293 copcodes = char_sequence.get_opcodes()
293 copcodes = char_sequence.get_opcodes()
294 obuffer, nbuffer = [], []
294 obuffer, nbuffer = [], []
295
295
296 if use_diff_match_patch:
296 if use_diff_match_patch:
297 dmp = diff_match_patch()
297 dmp = diff_match_patch()
298 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
298 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
299 reps = dmp.diff_main(old_string, new_string)
299 reps = dmp.diff_main(old_string, new_string)
300 dmp.diff_cleanupEfficiency(reps)
300 dmp.diff_cleanupEfficiency(reps)
301
301
302 a, b = 0, 0
302 a, b = 0, 0
303 for op, rep in reps:
303 for op, rep in reps:
304 l = len(rep)
304 l = len(rep)
305 if op == 0:
305 if op == 0:
306 for i, c in enumerate(rep):
306 for i, c in enumerate(rep):
307 obuffer.append((old_char_tokens[a+i][0], '', c))
307 obuffer.append((old_char_tokens[a+i][0], '', c))
308 nbuffer.append((new_char_tokens[b+i][0], '', c))
308 nbuffer.append((new_char_tokens[b+i][0], '', c))
309 a += l
309 a += l
310 b += l
310 b += l
311 elif op == -1:
311 elif op == -1:
312 for i, c in enumerate(rep):
312 for i, c in enumerate(rep):
313 obuffer.append((old_char_tokens[a+i][0], 'del', c))
313 obuffer.append((old_char_tokens[a+i][0], 'del', c))
314 a += l
314 a += l
315 elif op == 1:
315 elif op == 1:
316 for i, c in enumerate(rep):
316 for i, c in enumerate(rep):
317 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
317 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
318 b += l
318 b += l
319 else:
319 else:
320 for ctag, co1, co2, cn1, cn2 in copcodes:
320 for ctag, co1, co2, cn1, cn2 in copcodes:
321 if ctag == 'equal':
321 if ctag == 'equal':
322 for token_class, token_text in old_char_tokens[co1:co2]:
322 for token_class, token_text in old_char_tokens[co1:co2]:
323 obuffer.append((token_class, '', token_text))
323 obuffer.append((token_class, '', token_text))
324 for token_class, token_text in new_char_tokens[cn1:cn2]:
324 for token_class, token_text in new_char_tokens[cn1:cn2]:
325 nbuffer.append((token_class, '', token_text))
325 nbuffer.append((token_class, '', token_text))
326 elif ctag == 'delete':
326 elif ctag == 'delete':
327 for token_class, token_text in old_char_tokens[co1:co2]:
327 for token_class, token_text in old_char_tokens[co1:co2]:
328 obuffer.append((token_class, 'del', token_text))
328 obuffer.append((token_class, 'del', token_text))
329 elif ctag == 'insert':
329 elif ctag == 'insert':
330 for token_class, token_text in new_char_tokens[cn1:cn2]:
330 for token_class, token_text in new_char_tokens[cn1:cn2]:
331 nbuffer.append((token_class, 'ins', token_text))
331 nbuffer.append((token_class, 'ins', token_text))
332 elif ctag == 'replace':
332 elif ctag == 'replace':
333 for token_class, token_text in old_char_tokens[co1:co2]:
333 for token_class, token_text in old_char_tokens[co1:co2]:
334 obuffer.append((token_class, 'del', token_text))
334 obuffer.append((token_class, 'del', token_text))
335 for token_class, token_text in new_char_tokens[cn1:cn2]:
335 for token_class, token_text in new_char_tokens[cn1:cn2]:
336 nbuffer.append((token_class, 'ins', token_text))
336 nbuffer.append((token_class, 'ins', token_text))
337
337
338 old_tokens_result.extend(obuffer)
338 old_tokens_result.extend(obuffer)
339 new_tokens_result.extend(nbuffer)
339 new_tokens_result.extend(nbuffer)
340
340
341 return old_tokens_result, new_tokens_result, similarity
341 return old_tokens_result, new_tokens_result, similarity
342
342
343
343
344 class DiffSet(object):
344 class DiffSet(object):
345 """
345 """
346 An object for parsing the diff result from diffs.DiffProcessor and
346 An object for parsing the diff result from diffs.DiffProcessor and
347 adding highlighting, side by side/unified renderings and line diffs
347 adding highlighting, side by side/unified renderings and line diffs
348 """
348 """
349
349
350 HL_REAL = 'REAL' # highlights using original file, slow
350 HL_REAL = 'REAL' # highlights using original file, slow
351 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
351 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
352 # in the case of multiline code
352 # in the case of multiline code
353 HL_NONE = 'NONE' # no highlighting, fastest
353 HL_NONE = 'NONE' # no highlighting, fastest
354
354
355 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
355 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
356 source_repo_name=None,
356 source_repo_name=None,
357 source_node_getter=lambda filename: None,
357 source_node_getter=lambda filename: None,
358 target_node_getter=lambda filename: None,
358 target_node_getter=lambda filename: None,
359 source_nodes=None, target_nodes=None,
359 source_nodes=None, target_nodes=None,
360 max_file_size_limit=150 * 1024, # files over this size will
360 max_file_size_limit=150 * 1024, # files over this size will
361 # use fast highlighting
361 # use fast highlighting
362 comments=None,
362 comments=None,
363 ):
363 ):
364
364
365 self.highlight_mode = highlight_mode
365 self.highlight_mode = highlight_mode
366 self.highlighted_filenodes = {}
366 self.highlighted_filenodes = {}
367 self.source_node_getter = source_node_getter
367 self.source_node_getter = source_node_getter
368 self.target_node_getter = target_node_getter
368 self.target_node_getter = target_node_getter
369 self.source_nodes = source_nodes or {}
369 self.source_nodes = source_nodes or {}
370 self.target_nodes = target_nodes or {}
370 self.target_nodes = target_nodes or {}
371 self.repo_name = repo_name
371 self.repo_name = repo_name
372 self.source_repo_name = source_repo_name or repo_name
372 self.source_repo_name = source_repo_name or repo_name
373 self.comments = comments or {}
373 self.comments = comments or {}
374 self.comments_store = self.comments.copy()
374 self.max_file_size_limit = max_file_size_limit
375 self.max_file_size_limit = max_file_size_limit
375
376
376 def render_patchset(self, patchset, source_ref=None, target_ref=None):
377 def render_patchset(self, patchset, source_ref=None, target_ref=None):
377 diffset = AttributeDict(dict(
378 diffset = AttributeDict(dict(
378 lines_added=0,
379 lines_added=0,
379 lines_deleted=0,
380 lines_deleted=0,
380 changed_files=0,
381 changed_files=0,
381 files=[],
382 files=[],
382 limited_diff=isinstance(patchset, LimitedDiffContainer),
383 limited_diff=isinstance(patchset, LimitedDiffContainer),
383 repo_name=self.repo_name,
384 repo_name=self.repo_name,
384 source_repo_name=self.source_repo_name,
385 source_repo_name=self.source_repo_name,
385 source_ref=source_ref,
386 source_ref=source_ref,
386 target_ref=target_ref,
387 target_ref=target_ref,
387 ))
388 ))
388 for patch in patchset:
389 for patch in patchset:
389 filediff = self.render_patch(patch)
390 filediff = self.render_patch(patch)
390 filediff.diffset = diffset
391 filediff.diffset = diffset
391 diffset.files.append(filediff)
392 diffset.files.append(filediff)
392 diffset.changed_files += 1
393 diffset.changed_files += 1
393 if not patch['stats']['binary']:
394 if not patch['stats']['binary']:
394 diffset.lines_added += patch['stats']['added']
395 diffset.lines_added += patch['stats']['added']
395 diffset.lines_deleted += patch['stats']['deleted']
396 diffset.lines_deleted += patch['stats']['deleted']
396
397
397 return diffset
398 return diffset
398
399
399 _lexer_cache = {}
400 _lexer_cache = {}
400 def _get_lexer_for_filename(self, filename):
401 def _get_lexer_for_filename(self, filename):
401 # cached because we might need to call it twice for source/target
402 # cached because we might need to call it twice for source/target
402 if filename not in self._lexer_cache:
403 if filename not in self._lexer_cache:
403 self._lexer_cache[filename] = get_lexer_safe(filepath=filename)
404 self._lexer_cache[filename] = get_lexer_safe(filepath=filename)
404 return self._lexer_cache[filename]
405 return self._lexer_cache[filename]
405
406
406 def render_patch(self, patch):
407 def render_patch(self, patch):
407 log.debug('rendering diff for %r' % patch['filename'])
408 log.debug('rendering diff for %r' % patch['filename'])
408
409
409 source_filename = patch['original_filename']
410 source_filename = patch['original_filename']
410 target_filename = patch['filename']
411 target_filename = patch['filename']
411
412
412 source_lexer = plain_text_lexer
413 source_lexer = plain_text_lexer
413 target_lexer = plain_text_lexer
414 target_lexer = plain_text_lexer
414
415
415 if not patch['stats']['binary']:
416 if not patch['stats']['binary']:
416 if self.highlight_mode == self.HL_REAL:
417 if self.highlight_mode == self.HL_REAL:
417 if (source_filename and patch['operation'] in ('D', 'M')
418 if (source_filename and patch['operation'] in ('D', 'M')
418 and source_filename not in self.source_nodes):
419 and source_filename not in self.source_nodes):
419 self.source_nodes[source_filename] = (
420 self.source_nodes[source_filename] = (
420 self.source_node_getter(source_filename))
421 self.source_node_getter(source_filename))
421
422
422 if (target_filename and patch['operation'] in ('A', 'M')
423 if (target_filename and patch['operation'] in ('A', 'M')
423 and target_filename not in self.target_nodes):
424 and target_filename not in self.target_nodes):
424 self.target_nodes[target_filename] = (
425 self.target_nodes[target_filename] = (
425 self.target_node_getter(target_filename))
426 self.target_node_getter(target_filename))
426
427
427 elif self.highlight_mode == self.HL_FAST:
428 elif self.highlight_mode == self.HL_FAST:
428 source_lexer = self._get_lexer_for_filename(source_filename)
429 source_lexer = self._get_lexer_for_filename(source_filename)
429 target_lexer = self._get_lexer_for_filename(target_filename)
430 target_lexer = self._get_lexer_for_filename(target_filename)
430
431
431 source_file = self.source_nodes.get(source_filename, source_filename)
432 source_file = self.source_nodes.get(source_filename, source_filename)
432 target_file = self.target_nodes.get(target_filename, target_filename)
433 target_file = self.target_nodes.get(target_filename, target_filename)
433
434
434 source_filenode, target_filenode = None, None
435 source_filenode, target_filenode = None, None
435
436
436 # TODO: dan: FileNode.lexer works on the content of the file - which
437 # TODO: dan: FileNode.lexer works on the content of the file - which
437 # can be slow - issue #4289 explains a lexer clean up - which once
438 # can be slow - issue #4289 explains a lexer clean up - which once
438 # done can allow caching a lexer for a filenode to avoid the file lookup
439 # done can allow caching a lexer for a filenode to avoid the file lookup
439 if isinstance(source_file, FileNode):
440 if isinstance(source_file, FileNode):
440 source_filenode = source_file
441 source_filenode = source_file
441 source_lexer = source_file.lexer
442 source_lexer = source_file.lexer
442 if isinstance(target_file, FileNode):
443 if isinstance(target_file, FileNode):
443 target_filenode = target_file
444 target_filenode = target_file
444 target_lexer = target_file.lexer
445 target_lexer = target_file.lexer
445
446
446 source_file_path, target_file_path = None, None
447 source_file_path, target_file_path = None, None
447
448
448 if source_filename != '/dev/null':
449 if source_filename != '/dev/null':
449 source_file_path = source_filename
450 source_file_path = source_filename
450 if target_filename != '/dev/null':
451 if target_filename != '/dev/null':
451 target_file_path = target_filename
452 target_file_path = target_filename
452
453
453 source_file_type = source_lexer.name
454 source_file_type = source_lexer.name
454 target_file_type = target_lexer.name
455 target_file_type = target_lexer.name
455
456
456 op_hunks = patch['chunks'][0]
457 op_hunks = patch['chunks'][0]
457 hunks = patch['chunks'][1:]
458 hunks = patch['chunks'][1:]
458
459
459 filediff = AttributeDict({
460 filediff = AttributeDict({
460 'source_file_path': source_file_path,
461 'source_file_path': source_file_path,
461 'target_file_path': target_file_path,
462 'target_file_path': target_file_path,
462 'source_filenode': source_filenode,
463 'source_filenode': source_filenode,
463 'target_filenode': target_filenode,
464 'target_filenode': target_filenode,
464 'hunks': [],
465 'hunks': [],
465 'source_file_type': target_file_type,
466 'source_file_type': target_file_type,
466 'target_file_type': source_file_type,
467 'target_file_type': source_file_type,
467 'patch': patch,
468 'patch': patch,
468 'source_mode': patch['stats']['old_mode'],
469 'source_mode': patch['stats']['old_mode'],
469 'target_mode': patch['stats']['new_mode'],
470 'target_mode': patch['stats']['new_mode'],
470 'limited_diff': isinstance(patch, LimitedDiffContainer),
471 'limited_diff': isinstance(patch, LimitedDiffContainer),
471 'diffset': self,
472 'diffset': self,
472 })
473 })
473
474
474 for hunk in hunks:
475 for hunk in hunks:
475 hunkbit = self.parse_hunk(hunk, source_file, target_file)
476 hunkbit = self.parse_hunk(hunk, source_file, target_file)
476 hunkbit.filediff = filediff
477 hunkbit.filediff = filediff
477 filediff.hunks.append(hunkbit)
478 filediff.hunks.append(hunkbit)
479
480 left_comments = {}
481
482 if source_file_path in self.comments_store:
483 for lineno, comments in self.comments_store[source_file_path].items():
484 left_comments[lineno] = comments
485
486 if target_file_path in self.comments_store:
487 for lineno, comments in self.comments_store[target_file_path].items():
488 left_comments[lineno] = comments
489
490 filediff.left_comments = left_comments
478 return filediff
491 return filediff
479
492
480 def parse_hunk(self, hunk, source_file, target_file):
493 def parse_hunk(self, hunk, source_file, target_file):
481 result = AttributeDict(dict(
494 result = AttributeDict(dict(
482 source_start=hunk['source_start'],
495 source_start=hunk['source_start'],
483 source_length=hunk['source_length'],
496 source_length=hunk['source_length'],
484 target_start=hunk['target_start'],
497 target_start=hunk['target_start'],
485 target_length=hunk['target_length'],
498 target_length=hunk['target_length'],
486 section_header=hunk['section_header'],
499 section_header=hunk['section_header'],
487 lines=[],
500 lines=[],
488 ))
501 ))
489 before, after = [], []
502 before, after = [], []
490
503
491 for line in hunk['lines']:
504 for line in hunk['lines']:
492 if line['action'] == 'unmod':
505 if line['action'] == 'unmod':
493 result.lines.extend(
506 result.lines.extend(
494 self.parse_lines(before, after, source_file, target_file))
507 self.parse_lines(before, after, source_file, target_file))
495 after.append(line)
508 after.append(line)
496 before.append(line)
509 before.append(line)
497 elif line['action'] == 'add':
510 elif line['action'] == 'add':
498 after.append(line)
511 after.append(line)
499 elif line['action'] == 'del':
512 elif line['action'] == 'del':
500 before.append(line)
513 before.append(line)
501 elif line['action'] == 'old-no-nl':
514 elif line['action'] == 'old-no-nl':
502 before.append(line)
515 before.append(line)
503 elif line['action'] == 'new-no-nl':
516 elif line['action'] == 'new-no-nl':
504 after.append(line)
517 after.append(line)
505
518
506 result.lines.extend(
519 result.lines.extend(
507 self.parse_lines(before, after, source_file, target_file))
520 self.parse_lines(before, after, source_file, target_file))
508 result.unified = self.as_unified(result.lines)
521 result.unified = self.as_unified(result.lines)
509 result.sideside = result.lines
522 result.sideside = result.lines
523
510 return result
524 return result
511
525
512 def parse_lines(self, before_lines, after_lines, source_file, target_file):
526 def parse_lines(self, before_lines, after_lines, source_file, target_file):
513 # TODO: dan: investigate doing the diff comparison and fast highlighting
527 # TODO: dan: investigate doing the diff comparison and fast highlighting
514 # on the entire before and after buffered block lines rather than by
528 # on the entire before and after buffered block lines rather than by
515 # line, this means we can get better 'fast' highlighting if the context
529 # line, this means we can get better 'fast' highlighting if the context
516 # allows it - eg.
530 # allows it - eg.
517 # line 4: """
531 # line 4: """
518 # line 5: this gets highlighted as a string
532 # line 5: this gets highlighted as a string
519 # line 6: """
533 # line 6: """
520
534
521 lines = []
535 lines = []
522 while before_lines or after_lines:
536 while before_lines or after_lines:
523 before, after = None, None
537 before, after = None, None
524 before_tokens, after_tokens = None, None
538 before_tokens, after_tokens = None, None
525
539
526 if before_lines:
540 if before_lines:
527 before = before_lines.pop(0)
541 before = before_lines.pop(0)
528 if after_lines:
542 if after_lines:
529 after = after_lines.pop(0)
543 after = after_lines.pop(0)
530
544
531 original = AttributeDict()
545 original = AttributeDict()
532 modified = AttributeDict()
546 modified = AttributeDict()
533
547
534 if before:
548 if before:
535 if before['action'] == 'old-no-nl':
549 if before['action'] == 'old-no-nl':
536 before_tokens = [('nonl', before['line'])]
550 before_tokens = [('nonl', before['line'])]
537 else:
551 else:
538 before_tokens = self.get_line_tokens(
552 before_tokens = self.get_line_tokens(
539 line_text=before['line'], line_number=before['old_lineno'],
553 line_text=before['line'], line_number=before['old_lineno'],
540 file=source_file)
554 file=source_file)
541 original.lineno = before['old_lineno']
555 original.lineno = before['old_lineno']
542 original.content = before['line']
556 original.content = before['line']
543 original.action = self.action_to_op(before['action'])
557 original.action = self.action_to_op(before['action'])
544 original.comments = self.get_comments_for('old',
558 original.comments = self.get_comments_for('old',
545 source_file, before['old_lineno'])
559 source_file, before['old_lineno'])
546
560
547 if after:
561 if after:
548 if after['action'] == 'new-no-nl':
562 if after['action'] == 'new-no-nl':
549 after_tokens = [('nonl', after['line'])]
563 after_tokens = [('nonl', after['line'])]
550 else:
564 else:
551 after_tokens = self.get_line_tokens(
565 after_tokens = self.get_line_tokens(
552 line_text=after['line'], line_number=after['new_lineno'],
566 line_text=after['line'], line_number=after['new_lineno'],
553 file=target_file)
567 file=target_file)
554 modified.lineno = after['new_lineno']
568 modified.lineno = after['new_lineno']
555 modified.content = after['line']
569 modified.content = after['line']
556 modified.action = self.action_to_op(after['action'])
570 modified.action = self.action_to_op(after['action'])
557 modified.comments = self.get_comments_for('new',
571 modified.comments = self.get_comments_for('new',
558 target_file, after['new_lineno'])
572 target_file, after['new_lineno'])
559
573
560 # diff the lines
574 # diff the lines
561 if before_tokens and after_tokens:
575 if before_tokens and after_tokens:
562 o_tokens, m_tokens, similarity = tokens_diff(
576 o_tokens, m_tokens, similarity = tokens_diff(
563 before_tokens, after_tokens)
577 before_tokens, after_tokens)
564 original.content = render_tokenstream(o_tokens)
578 original.content = render_tokenstream(o_tokens)
565 modified.content = render_tokenstream(m_tokens)
579 modified.content = render_tokenstream(m_tokens)
566 elif before_tokens:
580 elif before_tokens:
567 original.content = render_tokenstream(
581 original.content = render_tokenstream(
568 [(x[0], '', x[1]) for x in before_tokens])
582 [(x[0], '', x[1]) for x in before_tokens])
569 elif after_tokens:
583 elif after_tokens:
570 modified.content = render_tokenstream(
584 modified.content = render_tokenstream(
571 [(x[0], '', x[1]) for x in after_tokens])
585 [(x[0], '', x[1]) for x in after_tokens])
572
586
573 lines.append(AttributeDict({
587 lines.append(AttributeDict({
574 'original': original,
588 'original': original,
575 'modified': modified,
589 'modified': modified,
576 }))
590 }))
577
591
578 return lines
592 return lines
579
593
580 def get_comments_for(self, version, file, line_number):
594 def get_comments_for(self, version, file, line_number):
581 if hasattr(file, 'unicode_path'):
595 if hasattr(file, 'unicode_path'):
582 file = file.unicode_path
596 file = file.unicode_path
583
597
584 if not isinstance(file, basestring):
598 if not isinstance(file, basestring):
585 return None
599 return None
586
600
587 line_key = {
601 line_key = {
588 'old': 'o',
602 'old': 'o',
589 'new': 'n',
603 'new': 'n',
590 }[version] + str(line_number)
604 }[version] + str(line_number)
591
605
592 return self.comments.get(file, {}).get(line_key)
606 if file in self.comments_store:
607 file_comments = self.comments_store[file]
608 if line_key in file_comments:
609 return file_comments.pop(line_key)
593
610
594 def get_line_tokens(self, line_text, line_number, file=None):
611 def get_line_tokens(self, line_text, line_number, file=None):
595 filenode = None
612 filenode = None
596 filename = None
613 filename = None
597
614
598 if isinstance(file, basestring):
615 if isinstance(file, basestring):
599 filename = file
616 filename = file
600 elif isinstance(file, FileNode):
617 elif isinstance(file, FileNode):
601 filenode = file
618 filenode = file
602 filename = file.unicode_path
619 filename = file.unicode_path
603
620
604 if self.highlight_mode == self.HL_REAL and filenode:
621 if self.highlight_mode == self.HL_REAL and filenode:
605 if line_number and file.size < self.max_file_size_limit:
622 if line_number and file.size < self.max_file_size_limit:
606 return self.get_tokenized_filenode_line(file, line_number)
623 return self.get_tokenized_filenode_line(file, line_number)
607
624
608 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
625 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
609 lexer = self._get_lexer_for_filename(filename)
626 lexer = self._get_lexer_for_filename(filename)
610 return list(tokenize_string(line_text, lexer))
627 return list(tokenize_string(line_text, lexer))
611
628
612 return list(tokenize_string(line_text, plain_text_lexer))
629 return list(tokenize_string(line_text, plain_text_lexer))
613
630
614 def get_tokenized_filenode_line(self, filenode, line_number):
631 def get_tokenized_filenode_line(self, filenode, line_number):
615
632
616 if filenode not in self.highlighted_filenodes:
633 if filenode not in self.highlighted_filenodes:
617 tokenized_lines = filenode_as_lines_tokens(filenode, filenode.lexer)
634 tokenized_lines = filenode_as_lines_tokens(filenode, filenode.lexer)
618 self.highlighted_filenodes[filenode] = tokenized_lines
635 self.highlighted_filenodes[filenode] = tokenized_lines
619 return self.highlighted_filenodes[filenode][line_number - 1]
636 return self.highlighted_filenodes[filenode][line_number - 1]
620
637
621 def action_to_op(self, action):
638 def action_to_op(self, action):
622 return {
639 return {
623 'add': '+',
640 'add': '+',
624 'del': '-',
641 'del': '-',
625 'unmod': ' ',
642 'unmod': ' ',
626 'old-no-nl': ' ',
643 'old-no-nl': ' ',
627 'new-no-nl': ' ',
644 'new-no-nl': ' ',
628 }.get(action, action)
645 }.get(action, action)
629
646
630 def as_unified(self, lines):
647 def as_unified(self, lines):
631 """ Return a generator that yields the lines of a diff in unified order """
648 """
649 Return a generator that yields the lines of a diff in unified order
650 """
632 def generator():
651 def generator():
633 buf = []
652 buf = []
634 for line in lines:
653 for line in lines:
635
654
636 if buf and not line.original or line.original.action == ' ':
655 if buf and not line.original or line.original.action == ' ':
637 for b in buf:
656 for b in buf:
638 yield b
657 yield b
639 buf = []
658 buf = []
640
659
641 if line.original:
660 if line.original:
642 if line.original.action == ' ':
661 if line.original.action == ' ':
643 yield (line.original.lineno, line.modified.lineno,
662 yield (line.original.lineno, line.modified.lineno,
644 line.original.action, line.original.content,
663 line.original.action, line.original.content,
645 line.original.comments)
664 line.original.comments)
646 continue
665 continue
647
666
648 if line.original.action == '-':
667 if line.original.action == '-':
649 yield (line.original.lineno, None,
668 yield (line.original.lineno, None,
650 line.original.action, line.original.content,
669 line.original.action, line.original.content,
651 line.original.comments)
670 line.original.comments)
652
671
653 if line.modified.action == '+':
672 if line.modified.action == '+':
654 buf.append((
673 buf.append((
655 None, line.modified.lineno,
674 None, line.modified.lineno,
656 line.modified.action, line.modified.content,
675 line.modified.action, line.modified.content,
657 line.modified.comments))
676 line.modified.comments))
658 continue
677 continue
659
678
660 if line.modified:
679 if line.modified:
661 yield (None, line.modified.lineno,
680 yield (None, line.modified.lineno,
662 line.modified.action, line.modified.content,
681 line.modified.action, line.modified.content,
663 line.modified.comments)
682 line.modified.comments)
664
683
665 for b in buf:
684 for b in buf:
666 yield b
685 yield b
667
686
668 return generator()
687 return generator()
General Comments 0
You need to be logged in to leave comments. Login now