##// END OF EJS Templates
ux: make 'no newline at end of file' message more pronounced in diffs
dan -
r1032:ab6082d0 default
parent child Browse files
Show More
@@ -1,635 +1,641 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2016 RhodeCode GmbH
3 # Copyright (C) 2011-2016 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 import difflib
23 from itertools import groupby
23 from itertools import groupby
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from rhodecode.lib.helpers import (
27 from rhodecode.lib.helpers import (
28 get_lexer_for_filenode, get_lexer_safe, html_escape)
28 get_lexer_for_filenode, get_lexer_safe, html_escape)
29 from rhodecode.lib.utils2 import AttributeDict
29 from rhodecode.lib.utils2 import AttributeDict
30 from rhodecode.lib.vcs.nodes import FileNode
30 from rhodecode.lib.vcs.nodes import FileNode
31 from rhodecode.lib.diff_match_patch import diff_match_patch
31 from rhodecode.lib.diff_match_patch import diff_match_patch
32 from rhodecode.lib.diffs import LimitedDiffContainer
32 from rhodecode.lib.diffs import LimitedDiffContainer
33 from pygments.lexers import get_lexer_by_name
33 from pygments.lexers import get_lexer_by_name
34
34
35 plain_text_lexer = get_lexer_by_name(
35 plain_text_lexer = get_lexer_by_name(
36 'text', stripall=False, stripnl=False, ensurenl=False)
36 'text', stripall=False, stripnl=False, ensurenl=False)
37
37
38
38
39 log = logging.getLogger()
39 log = logging.getLogger()
40
40
41
41
42 def filenode_as_lines_tokens(filenode, lexer=None):
42 def filenode_as_lines_tokens(filenode, lexer=None):
43 lexer = lexer or get_lexer_for_filenode(filenode)
43 lexer = lexer or get_lexer_for_filenode(filenode)
44 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
44 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
45 tokens = tokenize_string(filenode.content, lexer)
45 tokens = tokenize_string(filenode.content, lexer)
46 lines = split_token_stream(tokens, split_string='\n')
46 lines = split_token_stream(tokens, split_string='\n')
47 rv = list(lines)
47 rv = list(lines)
48 return rv
48 return rv
49
49
50
50
51 def tokenize_string(content, lexer):
51 def tokenize_string(content, lexer):
52 """
52 """
53 Use pygments to tokenize some content based on a lexer
53 Use pygments to tokenize some content based on a lexer
54 ensuring all original new lines and whitespace is preserved
54 ensuring all original new lines and whitespace is preserved
55 """
55 """
56
56
57 lexer.stripall = False
57 lexer.stripall = False
58 lexer.stripnl = False
58 lexer.stripnl = False
59 lexer.ensurenl = False
59 lexer.ensurenl = False
60 for token_type, token_text in lex(content, lexer):
60 for token_type, token_text in lex(content, lexer):
61 yield pygment_token_class(token_type), token_text
61 yield pygment_token_class(token_type), token_text
62
62
63
63
64 def split_token_stream(tokens, split_string=u'\n'):
64 def split_token_stream(tokens, split_string=u'\n'):
65 """
65 """
66 Take a list of (TokenType, text) tuples and split them by a string
66 Take a list of (TokenType, text) tuples and split them by a string
67
67
68 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
68 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
69 [(TEXT, 'some'), (TEXT, 'text'),
69 [(TEXT, 'some'), (TEXT, 'text'),
70 (TEXT, 'more'), (TEXT, 'text')]
70 (TEXT, 'more'), (TEXT, 'text')]
71 """
71 """
72
72
73 buffer = []
73 buffer = []
74 for token_class, token_text in tokens:
74 for token_class, token_text in tokens:
75 parts = token_text.split(split_string)
75 parts = token_text.split(split_string)
76 for part in parts[:-1]:
76 for part in parts[:-1]:
77 buffer.append((token_class, part))
77 buffer.append((token_class, part))
78 yield buffer
78 yield buffer
79 buffer = []
79 buffer = []
80
80
81 buffer.append((token_class, parts[-1]))
81 buffer.append((token_class, parts[-1]))
82
82
83 if buffer:
83 if buffer:
84 yield buffer
84 yield buffer
85
85
86
86
87 def filenode_as_annotated_lines_tokens(filenode):
87 def filenode_as_annotated_lines_tokens(filenode):
88 """
88 """
89 Take a file node and return a list of annotations => lines, if no annotation
89 Take a file node and return a list of annotations => lines, if no annotation
90 is found, it will be None.
90 is found, it will be None.
91
91
92 eg:
92 eg:
93
93
94 [
94 [
95 (annotation1, [
95 (annotation1, [
96 (1, line1_tokens_list),
96 (1, line1_tokens_list),
97 (2, line2_tokens_list),
97 (2, line2_tokens_list),
98 ]),
98 ]),
99 (annotation2, [
99 (annotation2, [
100 (3, line1_tokens_list),
100 (3, line1_tokens_list),
101 ]),
101 ]),
102 (None, [
102 (None, [
103 (4, line1_tokens_list),
103 (4, line1_tokens_list),
104 ]),
104 ]),
105 (annotation1, [
105 (annotation1, [
106 (5, line1_tokens_list),
106 (5, line1_tokens_list),
107 (6, line2_tokens_list),
107 (6, line2_tokens_list),
108 ])
108 ])
109 ]
109 ]
110 """
110 """
111
111
112 commit_cache = {} # cache commit_getter lookups
112 commit_cache = {} # cache commit_getter lookups
113
113
114 def _get_annotation(commit_id, commit_getter):
114 def _get_annotation(commit_id, commit_getter):
115 if commit_id not in commit_cache:
115 if commit_id not in commit_cache:
116 commit_cache[commit_id] = commit_getter()
116 commit_cache[commit_id] = commit_getter()
117 return commit_cache[commit_id]
117 return commit_cache[commit_id]
118
118
119 annotation_lookup = {
119 annotation_lookup = {
120 line_no: _get_annotation(commit_id, commit_getter)
120 line_no: _get_annotation(commit_id, commit_getter)
121 for line_no, commit_id, commit_getter, line_content
121 for line_no, commit_id, commit_getter, line_content
122 in filenode.annotate
122 in filenode.annotate
123 }
123 }
124
124
125 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
125 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
126 for line_no, tokens
126 for line_no, tokens
127 in enumerate(filenode_as_lines_tokens(filenode), 1))
127 in enumerate(filenode_as_lines_tokens(filenode), 1))
128
128
129 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
129 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
130
130
131 for annotation, group in grouped_annotations_lines:
131 for annotation, group in grouped_annotations_lines:
132 yield (
132 yield (
133 annotation, [(line_no, tokens)
133 annotation, [(line_no, tokens)
134 for (_, line_no, tokens) in group]
134 for (_, line_no, tokens) in group]
135 )
135 )
136
136
137
137
138 def render_tokenstream(tokenstream):
138 def render_tokenstream(tokenstream):
139 result = []
139 result = []
140 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
140 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
141
141
142 if token_class:
142 if token_class:
143 result.append(u'<span class="%s">' % token_class)
143 result.append(u'<span class="%s">' % token_class)
144 else:
144 else:
145 result.append(u'<span>')
145 result.append(u'<span>')
146
146
147 for op_tag, token_text in token_ops_texts:
147 for op_tag, token_text in token_ops_texts:
148
148
149 if op_tag:
149 if op_tag:
150 result.append(u'<%s>' % op_tag)
150 result.append(u'<%s>' % op_tag)
151
151
152 escaped_text = html_escape(token_text)
152 escaped_text = html_escape(token_text)
153
153
154 # TODO: dan: investigate showing hidden characters like space/nl/tab
154 # TODO: dan: investigate showing hidden characters like space/nl/tab
155 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
155 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
156 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
156 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
157 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
157 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
158
158
159 result.append(escaped_text)
159 result.append(escaped_text)
160
160
161 if op_tag:
161 if op_tag:
162 result.append(u'</%s>' % op_tag)
162 result.append(u'</%s>' % op_tag)
163
163
164 result.append(u'</span>')
164 result.append(u'</span>')
165
165
166 html = ''.join(result)
166 html = ''.join(result)
167 return html
167 return html
168
168
169
169
170 def rollup_tokenstream(tokenstream):
170 def rollup_tokenstream(tokenstream):
171 """
171 """
172 Group a token stream of the format:
172 Group a token stream of the format:
173
173
174 ('class', 'op', 'text')
174 ('class', 'op', 'text')
175 or
175 or
176 ('class', 'text')
176 ('class', 'text')
177
177
178 into
178 into
179
179
180 [('class1',
180 [('class1',
181 [('op1', 'text'),
181 [('op1', 'text'),
182 ('op2', 'text')]),
182 ('op2', 'text')]),
183 ('class2',
183 ('class2',
184 [('op3', 'text')])]
184 [('op3', 'text')])]
185
185
186 This is used to get the minimal tags necessary when
186 This is used to get the minimal tags necessary when
187 rendering to html eg for a token stream ie.
187 rendering to html eg for a token stream ie.
188
188
189 <span class="A"><ins>he</ins>llo</span>
189 <span class="A"><ins>he</ins>llo</span>
190 vs
190 vs
191 <span class="A"><ins>he</ins></span><span class="A">llo</span>
191 <span class="A"><ins>he</ins></span><span class="A">llo</span>
192
192
193 If a 2 tuple is passed in, the output op will be an empty string.
193 If a 2 tuple is passed in, the output op will be an empty string.
194
194
195 eg:
195 eg:
196
196
197 >>> rollup_tokenstream([('classA', '', 'h'),
197 >>> rollup_tokenstream([('classA', '', 'h'),
198 ('classA', 'del', 'ell'),
198 ('classA', 'del', 'ell'),
199 ('classA', '', 'o'),
199 ('classA', '', 'o'),
200 ('classB', '', ' '),
200 ('classB', '', ' '),
201 ('classA', '', 'the'),
201 ('classA', '', 'the'),
202 ('classA', '', 're'),
202 ('classA', '', 're'),
203 ])
203 ])
204
204
205 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
205 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
206 ('classB', [('', ' ')],
206 ('classB', [('', ' ')],
207 ('classA', [('', 'there')]]
207 ('classA', [('', 'there')]]
208
208
209 """
209 """
210 if tokenstream and len(tokenstream[0]) == 2:
210 if tokenstream and len(tokenstream[0]) == 2:
211 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
211 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
212
212
213 result = []
213 result = []
214 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
214 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
215 ops = []
215 ops = []
216 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
216 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
217 text_buffer = []
217 text_buffer = []
218 for t_class, t_op, t_text in token_text_list:
218 for t_class, t_op, t_text in token_text_list:
219 text_buffer.append(t_text)
219 text_buffer.append(t_text)
220 ops.append((token_op, ''.join(text_buffer)))
220 ops.append((token_op, ''.join(text_buffer)))
221 result.append((token_class, ops))
221 result.append((token_class, ops))
222 return result
222 return result
223
223
224
224
225 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
225 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
226 """
226 """
227 Converts a list of (token_class, token_text) tuples to a list of
227 Converts a list of (token_class, token_text) tuples to a list of
228 (token_class, token_op, token_text) tuples where token_op is one of
228 (token_class, token_op, token_text) tuples where token_op is one of
229 ('ins', 'del', '')
229 ('ins', 'del', '')
230
230
231 :param old_tokens: list of (token_class, token_text) tuples of old line
231 :param old_tokens: list of (token_class, token_text) tuples of old line
232 :param new_tokens: list of (token_class, token_text) tuples of new line
232 :param new_tokens: list of (token_class, token_text) tuples of new line
233 :param use_diff_match_patch: boolean, will use google's diff match patch
233 :param use_diff_match_patch: boolean, will use google's diff match patch
234 library which has options to 'smooth' out the character by character
234 library which has options to 'smooth' out the character by character
235 differences making nicer ins/del blocks
235 differences making nicer ins/del blocks
236 """
236 """
237
237
238 old_tokens_result = []
238 old_tokens_result = []
239 new_tokens_result = []
239 new_tokens_result = []
240
240
241 similarity = difflib.SequenceMatcher(None,
241 similarity = difflib.SequenceMatcher(None,
242 ''.join(token_text for token_class, token_text in old_tokens),
242 ''.join(token_text for token_class, token_text in old_tokens),
243 ''.join(token_text for token_class, token_text in new_tokens)
243 ''.join(token_text for token_class, token_text in new_tokens)
244 ).ratio()
244 ).ratio()
245
245
246 if similarity < 0.6: # return, the blocks are too different
246 if similarity < 0.6: # return, the blocks are too different
247 for token_class, token_text in old_tokens:
247 for token_class, token_text in old_tokens:
248 old_tokens_result.append((token_class, '', token_text))
248 old_tokens_result.append((token_class, '', token_text))
249 for token_class, token_text in new_tokens:
249 for token_class, token_text in new_tokens:
250 new_tokens_result.append((token_class, '', token_text))
250 new_tokens_result.append((token_class, '', token_text))
251 return old_tokens_result, new_tokens_result, similarity
251 return old_tokens_result, new_tokens_result, similarity
252
252
253 token_sequence_matcher = difflib.SequenceMatcher(None,
253 token_sequence_matcher = difflib.SequenceMatcher(None,
254 [x[1] for x in old_tokens],
254 [x[1] for x in old_tokens],
255 [x[1] for x in new_tokens])
255 [x[1] for x in new_tokens])
256
256
257 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
257 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
258 # check the differences by token block types first to give a more
258 # check the differences by token block types first to give a more
259 # nicer "block" level replacement vs character diffs
259 # nicer "block" level replacement vs character diffs
260
260
261 if tag == 'equal':
261 if tag == 'equal':
262 for token_class, token_text in old_tokens[o1:o2]:
262 for token_class, token_text in old_tokens[o1:o2]:
263 old_tokens_result.append((token_class, '', token_text))
263 old_tokens_result.append((token_class, '', token_text))
264 for token_class, token_text in new_tokens[n1:n2]:
264 for token_class, token_text in new_tokens[n1:n2]:
265 new_tokens_result.append((token_class, '', token_text))
265 new_tokens_result.append((token_class, '', token_text))
266 elif tag == 'delete':
266 elif tag == 'delete':
267 for token_class, token_text in old_tokens[o1:o2]:
267 for token_class, token_text in old_tokens[o1:o2]:
268 old_tokens_result.append((token_class, 'del', token_text))
268 old_tokens_result.append((token_class, 'del', token_text))
269 elif tag == 'insert':
269 elif tag == 'insert':
270 for token_class, token_text in new_tokens[n1:n2]:
270 for token_class, token_text in new_tokens[n1:n2]:
271 new_tokens_result.append((token_class, 'ins', token_text))
271 new_tokens_result.append((token_class, 'ins', token_text))
272 elif tag == 'replace':
272 elif tag == 'replace':
273 # if same type token blocks must be replaced, do a diff on the
273 # if same type token blocks must be replaced, do a diff on the
274 # characters in the token blocks to show individual changes
274 # characters in the token blocks to show individual changes
275
275
276 old_char_tokens = []
276 old_char_tokens = []
277 new_char_tokens = []
277 new_char_tokens = []
278 for token_class, token_text in old_tokens[o1:o2]:
278 for token_class, token_text in old_tokens[o1:o2]:
279 for char in token_text:
279 for char in token_text:
280 old_char_tokens.append((token_class, char))
280 old_char_tokens.append((token_class, char))
281
281
282 for token_class, token_text in new_tokens[n1:n2]:
282 for token_class, token_text in new_tokens[n1:n2]:
283 for char in token_text:
283 for char in token_text:
284 new_char_tokens.append((token_class, char))
284 new_char_tokens.append((token_class, char))
285
285
286 old_string = ''.join([token_text for
286 old_string = ''.join([token_text for
287 token_class, token_text in old_char_tokens])
287 token_class, token_text in old_char_tokens])
288 new_string = ''.join([token_text for
288 new_string = ''.join([token_text for
289 token_class, token_text in new_char_tokens])
289 token_class, token_text in new_char_tokens])
290
290
291 char_sequence = difflib.SequenceMatcher(
291 char_sequence = difflib.SequenceMatcher(
292 None, old_string, new_string)
292 None, old_string, new_string)
293 copcodes = char_sequence.get_opcodes()
293 copcodes = char_sequence.get_opcodes()
294 obuffer, nbuffer = [], []
294 obuffer, nbuffer = [], []
295
295
296 if use_diff_match_patch:
296 if use_diff_match_patch:
297 dmp = diff_match_patch()
297 dmp = diff_match_patch()
298 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
298 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
299 reps = dmp.diff_main(old_string, new_string)
299 reps = dmp.diff_main(old_string, new_string)
300 dmp.diff_cleanupEfficiency(reps)
300 dmp.diff_cleanupEfficiency(reps)
301
301
302 a, b = 0, 0
302 a, b = 0, 0
303 for op, rep in reps:
303 for op, rep in reps:
304 l = len(rep)
304 l = len(rep)
305 if op == 0:
305 if op == 0:
306 for i, c in enumerate(rep):
306 for i, c in enumerate(rep):
307 obuffer.append((old_char_tokens[a+i][0], '', c))
307 obuffer.append((old_char_tokens[a+i][0], '', c))
308 nbuffer.append((new_char_tokens[b+i][0], '', c))
308 nbuffer.append((new_char_tokens[b+i][0], '', c))
309 a += l
309 a += l
310 b += l
310 b += l
311 elif op == -1:
311 elif op == -1:
312 for i, c in enumerate(rep):
312 for i, c in enumerate(rep):
313 obuffer.append((old_char_tokens[a+i][0], 'del', c))
313 obuffer.append((old_char_tokens[a+i][0], 'del', c))
314 a += l
314 a += l
315 elif op == 1:
315 elif op == 1:
316 for i, c in enumerate(rep):
316 for i, c in enumerate(rep):
317 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
317 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
318 b += l
318 b += l
319 else:
319 else:
320 for ctag, co1, co2, cn1, cn2 in copcodes:
320 for ctag, co1, co2, cn1, cn2 in copcodes:
321 if ctag == 'equal':
321 if ctag == 'equal':
322 for token_class, token_text in old_char_tokens[co1:co2]:
322 for token_class, token_text in old_char_tokens[co1:co2]:
323 obuffer.append((token_class, '', token_text))
323 obuffer.append((token_class, '', token_text))
324 for token_class, token_text in new_char_tokens[cn1:cn2]:
324 for token_class, token_text in new_char_tokens[cn1:cn2]:
325 nbuffer.append((token_class, '', token_text))
325 nbuffer.append((token_class, '', token_text))
326 elif ctag == 'delete':
326 elif ctag == 'delete':
327 for token_class, token_text in old_char_tokens[co1:co2]:
327 for token_class, token_text in old_char_tokens[co1:co2]:
328 obuffer.append((token_class, 'del', token_text))
328 obuffer.append((token_class, 'del', token_text))
329 elif ctag == 'insert':
329 elif ctag == 'insert':
330 for token_class, token_text in new_char_tokens[cn1:cn2]:
330 for token_class, token_text in new_char_tokens[cn1:cn2]:
331 nbuffer.append((token_class, 'ins', token_text))
331 nbuffer.append((token_class, 'ins', token_text))
332 elif ctag == 'replace':
332 elif ctag == 'replace':
333 for token_class, token_text in old_char_tokens[co1:co2]:
333 for token_class, token_text in old_char_tokens[co1:co2]:
334 obuffer.append((token_class, 'del', token_text))
334 obuffer.append((token_class, 'del', token_text))
335 for token_class, token_text in new_char_tokens[cn1:cn2]:
335 for token_class, token_text in new_char_tokens[cn1:cn2]:
336 nbuffer.append((token_class, 'ins', token_text))
336 nbuffer.append((token_class, 'ins', token_text))
337
337
338 old_tokens_result.extend(obuffer)
338 old_tokens_result.extend(obuffer)
339 new_tokens_result.extend(nbuffer)
339 new_tokens_result.extend(nbuffer)
340
340
341 return old_tokens_result, new_tokens_result, similarity
341 return old_tokens_result, new_tokens_result, similarity
342
342
343
343
344 class DiffSet(object):
344 class DiffSet(object):
345 """
345 """
346 An object for parsing the diff result from diffs.DiffProcessor and
346 An object for parsing the diff result from diffs.DiffProcessor and
347 adding highlighting, side by side/unified renderings and line diffs
347 adding highlighting, side by side/unified renderings and line diffs
348 """
348 """
349
349
350 HL_REAL = 'REAL' # highlights using original file, slow
350 HL_REAL = 'REAL' # highlights using original file, slow
351 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
351 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
352 # in the case of multiline code
352 # in the case of multiline code
353 HL_NONE = 'NONE' # no highlighting, fastest
353 HL_NONE = 'NONE' # no highlighting, fastest
354
354
355 def __init__(self, highlight_mode=HL_REAL,
355 def __init__(self, highlight_mode=HL_REAL,
356 source_node_getter=lambda filename: None,
356 source_node_getter=lambda filename: None,
357 target_node_getter=lambda filename: None,
357 target_node_getter=lambda filename: None,
358 source_nodes=None, target_nodes=None,
358 source_nodes=None, target_nodes=None,
359 max_file_size_limit=150 * 1024, # files over this size will
359 max_file_size_limit=150 * 1024, # files over this size will
360 # use fast highlighting
360 # use fast highlighting
361 ):
361 ):
362
362
363 self.highlight_mode = highlight_mode
363 self.highlight_mode = highlight_mode
364 self.highlighted_filenodes = {}
364 self.highlighted_filenodes = {}
365 self.source_node_getter = source_node_getter
365 self.source_node_getter = source_node_getter
366 self.target_node_getter = target_node_getter
366 self.target_node_getter = target_node_getter
367 self.source_nodes = source_nodes or {}
367 self.source_nodes = source_nodes or {}
368 self.target_nodes = target_nodes or {}
368 self.target_nodes = target_nodes or {}
369
369
370
370
371 self.max_file_size_limit = max_file_size_limit
371 self.max_file_size_limit = max_file_size_limit
372
372
373 def render_patchset(self, patchset, source_ref=None, target_ref=None):
373 def render_patchset(self, patchset, source_ref=None, target_ref=None):
374 diffset = AttributeDict(dict(
374 diffset = AttributeDict(dict(
375 lines_added=0,
375 lines_added=0,
376 lines_deleted=0,
376 lines_deleted=0,
377 changed_files=0,
377 changed_files=0,
378 files=[],
378 files=[],
379 limited_diff=isinstance(patchset, LimitedDiffContainer),
379 limited_diff=isinstance(patchset, LimitedDiffContainer),
380 source_ref=source_ref,
380 source_ref=source_ref,
381 target_ref=target_ref,
381 target_ref=target_ref,
382 ))
382 ))
383 for patch in patchset:
383 for patch in patchset:
384 filediff = self.render_patch(patch)
384 filediff = self.render_patch(patch)
385 filediff.diffset = diffset
385 filediff.diffset = diffset
386 diffset.files.append(filediff)
386 diffset.files.append(filediff)
387 diffset.changed_files += 1
387 diffset.changed_files += 1
388 if not patch['stats']['binary']:
388 if not patch['stats']['binary']:
389 diffset.lines_added += patch['stats']['added']
389 diffset.lines_added += patch['stats']['added']
390 diffset.lines_deleted += patch['stats']['deleted']
390 diffset.lines_deleted += patch['stats']['deleted']
391
391
392 return diffset
392 return diffset
393
393
394 _lexer_cache = {}
394 _lexer_cache = {}
395 def _get_lexer_for_filename(self, filename):
395 def _get_lexer_for_filename(self, filename):
396 # cached because we might need to call it twice for source/target
396 # cached because we might need to call it twice for source/target
397 if filename not in self._lexer_cache:
397 if filename not in self._lexer_cache:
398 self._lexer_cache[filename] = get_lexer_safe(filepath=filename)
398 self._lexer_cache[filename] = get_lexer_safe(filepath=filename)
399 return self._lexer_cache[filename]
399 return self._lexer_cache[filename]
400
400
401 def render_patch(self, patch):
401 def render_patch(self, patch):
402 log.debug('rendering diff for %r' % patch['filename'])
402 log.debug('rendering diff for %r' % patch['filename'])
403
403
404 source_filename = patch['original_filename']
404 source_filename = patch['original_filename']
405 target_filename = patch['filename']
405 target_filename = patch['filename']
406
406
407 source_lexer = plain_text_lexer
407 source_lexer = plain_text_lexer
408 target_lexer = plain_text_lexer
408 target_lexer = plain_text_lexer
409
409
410 if not patch['stats']['binary']:
410 if not patch['stats']['binary']:
411 if self.highlight_mode == self.HL_REAL:
411 if self.highlight_mode == self.HL_REAL:
412 if (source_filename and patch['operation'] in ('D', 'M')
412 if (source_filename and patch['operation'] in ('D', 'M')
413 and source_filename not in self.source_nodes):
413 and source_filename not in self.source_nodes):
414 self.source_nodes[source_filename] = (
414 self.source_nodes[source_filename] = (
415 self.source_node_getter(source_filename))
415 self.source_node_getter(source_filename))
416
416
417 if (target_filename and patch['operation'] in ('A', 'M')
417 if (target_filename and patch['operation'] in ('A', 'M')
418 and target_filename not in self.target_nodes):
418 and target_filename not in self.target_nodes):
419 self.target_nodes[target_filename] = (
419 self.target_nodes[target_filename] = (
420 self.target_node_getter(target_filename))
420 self.target_node_getter(target_filename))
421
421
422 elif self.highlight_mode == self.HL_FAST:
422 elif self.highlight_mode == self.HL_FAST:
423 source_lexer = self._get_lexer_for_filename(source_filename)
423 source_lexer = self._get_lexer_for_filename(source_filename)
424 target_lexer = self._get_lexer_for_filename(target_filename)
424 target_lexer = self._get_lexer_for_filename(target_filename)
425
425
426 source_file = self.source_nodes.get(source_filename, source_filename)
426 source_file = self.source_nodes.get(source_filename, source_filename)
427 target_file = self.target_nodes.get(target_filename, target_filename)
427 target_file = self.target_nodes.get(target_filename, target_filename)
428
428
429 source_filenode, target_filenode = None, None
429 source_filenode, target_filenode = None, None
430
430
431 # TODO: dan: FileNode.lexer works on the content of the file - which
431 # TODO: dan: FileNode.lexer works on the content of the file - which
432 # can be slow - issue #4289 explains a lexer clean up - which once
432 # can be slow - issue #4289 explains a lexer clean up - which once
433 # done can allow caching a lexer for a filenode to avoid the file lookup
433 # done can allow caching a lexer for a filenode to avoid the file lookup
434 if isinstance(source_file, FileNode):
434 if isinstance(source_file, FileNode):
435 source_filenode = source_file
435 source_filenode = source_file
436 source_lexer = source_file.lexer
436 source_lexer = source_file.lexer
437 if isinstance(target_file, FileNode):
437 if isinstance(target_file, FileNode):
438 target_filenode = target_file
438 target_filenode = target_file
439 target_lexer = target_file.lexer
439 target_lexer = target_file.lexer
440
440
441 source_file_path, target_file_path = None, None
441 source_file_path, target_file_path = None, None
442
442
443 if source_filename != '/dev/null':
443 if source_filename != '/dev/null':
444 source_file_path = source_filename
444 source_file_path = source_filename
445 if target_filename != '/dev/null':
445 if target_filename != '/dev/null':
446 target_file_path = target_filename
446 target_file_path = target_filename
447
447
448 source_file_type = source_lexer.name
448 source_file_type = source_lexer.name
449 target_file_type = target_lexer.name
449 target_file_type = target_lexer.name
450
450
451 op_hunks = patch['chunks'][0]
451 op_hunks = patch['chunks'][0]
452 hunks = patch['chunks'][1:]
452 hunks = patch['chunks'][1:]
453
453
454 filediff = AttributeDict({
454 filediff = AttributeDict({
455 'source_file_path': source_file_path,
455 'source_file_path': source_file_path,
456 'target_file_path': target_file_path,
456 'target_file_path': target_file_path,
457 'source_filenode': source_filenode,
457 'source_filenode': source_filenode,
458 'target_filenode': target_filenode,
458 'target_filenode': target_filenode,
459 'hunks': [],
459 'hunks': [],
460 'source_file_type': target_file_type,
460 'source_file_type': target_file_type,
461 'target_file_type': source_file_type,
461 'target_file_type': source_file_type,
462 'patch': patch,
462 'patch': patch,
463 'source_mode': patch['stats']['old_mode'],
463 'source_mode': patch['stats']['old_mode'],
464 'target_mode': patch['stats']['new_mode'],
464 'target_mode': patch['stats']['new_mode'],
465 'limited_diff': isinstance(patch, LimitedDiffContainer),
465 'limited_diff': isinstance(patch, LimitedDiffContainer),
466 'diffset': self,
466 'diffset': self,
467 })
467 })
468
468
469 for hunk in hunks:
469 for hunk in hunks:
470 hunkbit = self.parse_hunk(hunk, source_file, target_file)
470 hunkbit = self.parse_hunk(hunk, source_file, target_file)
471 hunkbit.filediff = filediff
471 hunkbit.filediff = filediff
472 filediff.hunks.append(hunkbit)
472 filediff.hunks.append(hunkbit)
473 return filediff
473 return filediff
474
474
475 def parse_hunk(self, hunk, source_file, target_file):
475 def parse_hunk(self, hunk, source_file, target_file):
476 result = AttributeDict(dict(
476 result = AttributeDict(dict(
477 source_start=hunk['source_start'],
477 source_start=hunk['source_start'],
478 source_length=hunk['source_length'],
478 source_length=hunk['source_length'],
479 target_start=hunk['target_start'],
479 target_start=hunk['target_start'],
480 target_length=hunk['target_length'],
480 target_length=hunk['target_length'],
481 section_header=hunk['section_header'],
481 section_header=hunk['section_header'],
482 lines=[],
482 lines=[],
483 ))
483 ))
484 before, after = [], []
484 before, after = [], []
485
485
486 for line in hunk['lines']:
486 for line in hunk['lines']:
487 if line['action'] == 'unmod':
487 if line['action'] == 'unmod':
488 result.lines.extend(
488 result.lines.extend(
489 self.parse_lines(before, after, source_file, target_file))
489 self.parse_lines(before, after, source_file, target_file))
490 after.append(line)
490 after.append(line)
491 before.append(line)
491 before.append(line)
492 elif line['action'] == 'add':
492 elif line['action'] == 'add':
493 after.append(line)
493 after.append(line)
494 elif line['action'] == 'del':
494 elif line['action'] == 'del':
495 before.append(line)
495 before.append(line)
496 elif line['action'] == 'context-old':
496 elif line['action'] == 'old-no-nl':
497 before.append(line)
497 before.append(line)
498 elif line['action'] == 'context-new':
498 elif line['action'] == 'new-no-nl':
499 after.append(line)
499 after.append(line)
500
500
501 result.lines.extend(
501 result.lines.extend(
502 self.parse_lines(before, after, source_file, target_file))
502 self.parse_lines(before, after, source_file, target_file))
503 result.unified = self.as_unified(result.lines)
503 result.unified = self.as_unified(result.lines)
504 result.sideside = result.lines
504 result.sideside = result.lines
505 return result
505 return result
506
506
507 def parse_lines(self, before_lines, after_lines, source_file, target_file):
507 def parse_lines(self, before_lines, after_lines, source_file, target_file):
508 # TODO: dan: investigate doing the diff comparison and fast highlighting
508 # TODO: dan: investigate doing the diff comparison and fast highlighting
509 # on the entire before and after buffered block lines rather than by
509 # on the entire before and after buffered block lines rather than by
510 # line, this means we can get better 'fast' highlighting if the context
510 # line, this means we can get better 'fast' highlighting if the context
511 # allows it - eg.
511 # allows it - eg.
512 # line 4: """
512 # line 4: """
513 # line 5: this gets highlighted as a string
513 # line 5: this gets highlighted as a string
514 # line 6: """
514 # line 6: """
515
515
516 lines = []
516 lines = []
517 while before_lines or after_lines:
517 while before_lines or after_lines:
518 before, after = None, None
518 before, after = None, None
519 before_tokens, after_tokens = None, None
519 before_tokens, after_tokens = None, None
520
520
521 if before_lines:
521 if before_lines:
522 before = before_lines.pop(0)
522 before = before_lines.pop(0)
523 if after_lines:
523 if after_lines:
524 after = after_lines.pop(0)
524 after = after_lines.pop(0)
525
525
526 original = AttributeDict()
526 original = AttributeDict()
527 modified = AttributeDict()
527 modified = AttributeDict()
528
528
529 if before:
529 if before:
530 before_tokens = self.get_line_tokens(
530 if before['action'] == 'old-no-nl':
531 line_text=before['line'], line_number=before['old_lineno'],
531 before_tokens = [('nonl', before['line'])]
532 file=source_file)
532 else:
533 before_tokens = self.get_line_tokens(
534 line_text=before['line'], line_number=before['old_lineno'],
535 file=source_file)
533 original.lineno = before['old_lineno']
536 original.lineno = before['old_lineno']
534 original.content = before['line']
537 original.content = before['line']
535 original.action = self.action_to_op(before['action'])
538 original.action = self.action_to_op(before['action'])
536
539
537 if after:
540 if after:
538 after_tokens = self.get_line_tokens(
541 if after['action'] == 'new-no-nl':
539 line_text=after['line'], line_number=after['new_lineno'],
542 after_tokens = [('nonl', after['line'])]
540 file=target_file)
543 else:
544 after_tokens = self.get_line_tokens(
545 line_text=after['line'], line_number=after['new_lineno'],
546 file=target_file)
541 modified.lineno = after['new_lineno']
547 modified.lineno = after['new_lineno']
542 modified.content = after['line']
548 modified.content = after['line']
543 modified.action = self.action_to_op(after['action'])
549 modified.action = self.action_to_op(after['action'])
544
550
545
546 # diff the lines
551 # diff the lines
547 if before_tokens and after_tokens:
552 if before_tokens and after_tokens:
548 o_tokens, m_tokens, similarity = tokens_diff(before_tokens, after_tokens)
553 o_tokens, m_tokens, similarity = tokens_diff(
554 before_tokens, after_tokens)
549 original.content = render_tokenstream(o_tokens)
555 original.content = render_tokenstream(o_tokens)
550 modified.content = render_tokenstream(m_tokens)
556 modified.content = render_tokenstream(m_tokens)
551 elif before_tokens:
557 elif before_tokens:
552 original.content = render_tokenstream(
558 original.content = render_tokenstream(
553 [(x[0], '', x[1]) for x in before_tokens])
559 [(x[0], '', x[1]) for x in before_tokens])
554 elif after_tokens:
560 elif after_tokens:
555 modified.content = render_tokenstream(
561 modified.content = render_tokenstream(
556 [(x[0], '', x[1]) for x in after_tokens])
562 [(x[0], '', x[1]) for x in after_tokens])
557
563
558 lines.append(AttributeDict({
564 lines.append(AttributeDict({
559 'original': original,
565 'original': original,
560 'modified': modified,
566 'modified': modified,
561 }))
567 }))
562
568
563 return lines
569 return lines
564
570
565 def get_line_tokens(self, line_text, line_number, file=None):
571 def get_line_tokens(self, line_text, line_number, file=None):
566 filenode = None
572 filenode = None
567 filename = None
573 filename = None
568
574
569 if isinstance(file, basestring):
575 if isinstance(file, basestring):
570 filename = file
576 filename = file
571 elif isinstance(file, FileNode):
577 elif isinstance(file, FileNode):
572 filenode = file
578 filenode = file
573 filename = file.unicode_path
579 filename = file.unicode_path
574
580
575 if self.highlight_mode == self.HL_REAL and filenode:
581 if self.highlight_mode == self.HL_REAL and filenode:
576 if line_number and file.size < self.max_file_size_limit:
582 if line_number and file.size < self.max_file_size_limit:
577 return self.get_tokenized_filenode_line(file, line_number)
583 return self.get_tokenized_filenode_line(file, line_number)
578
584
579 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
585 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
580 lexer = self._get_lexer_for_filename(filename)
586 lexer = self._get_lexer_for_filename(filename)
581 return list(tokenize_string(line_text, lexer))
587 return list(tokenize_string(line_text, lexer))
582
588
583 return list(tokenize_string(line_text, plain_text_lexer))
589 return list(tokenize_string(line_text, plain_text_lexer))
584
590
585 def get_tokenized_filenode_line(self, filenode, line_number):
591 def get_tokenized_filenode_line(self, filenode, line_number):
586
592
587 if filenode not in self.highlighted_filenodes:
593 if filenode not in self.highlighted_filenodes:
588 tokenized_lines = filenode_as_lines_tokens(filenode, filenode.lexer)
594 tokenized_lines = filenode_as_lines_tokens(filenode, filenode.lexer)
589 self.highlighted_filenodes[filenode] = tokenized_lines
595 self.highlighted_filenodes[filenode] = tokenized_lines
590 return self.highlighted_filenodes[filenode][line_number - 1]
596 return self.highlighted_filenodes[filenode][line_number - 1]
591
597
592 def action_to_op(self, action):
598 def action_to_op(self, action):
593 return {
599 return {
594 'add': '+',
600 'add': '+',
595 'del': '-',
601 'del': '-',
596 'unmod': ' ',
602 'unmod': ' ',
597 'context-old': ' ',
603 'old-no-nl': ' ',
598 'context-new': ' ',
604 'new-no-nl': ' ',
599 }.get(action, action)
605 }.get(action, action)
600
606
601 def as_unified(self, lines):
607 def as_unified(self, lines):
602 """ Return a generator that yields the lines of a diff in unified order """
608 """ Return a generator that yields the lines of a diff in unified order """
603 def generator():
609 def generator():
604 buf = []
610 buf = []
605 for line in lines:
611 for line in lines:
606
612
607 if buf and not line.original or line.original.action == ' ':
613 if buf and not line.original or line.original.action == ' ':
608 for b in buf:
614 for b in buf:
609 yield b
615 yield b
610 buf = []
616 buf = []
611
617
612 if line.original:
618 if line.original:
613 if line.original.action == ' ':
619 if line.original.action == ' ':
614 yield (line.original.lineno, line.modified.lineno,
620 yield (line.original.lineno, line.modified.lineno,
615 line.original.action, line.original.content)
621 line.original.action, line.original.content)
616 continue
622 continue
617
623
618 if line.original.action == '-':
624 if line.original.action == '-':
619 yield (line.original.lineno, None,
625 yield (line.original.lineno, None,
620 line.original.action, line.original.content)
626 line.original.action, line.original.content)
621
627
622 if line.modified.action == '+':
628 if line.modified.action == '+':
623 buf.append((
629 buf.append((
624 None, line.modified.lineno,
630 None, line.modified.lineno,
625 line.modified.action, line.modified.content))
631 line.modified.action, line.modified.content))
626 continue
632 continue
627
633
628 if line.modified:
634 if line.modified:
629 yield (None, line.modified.lineno,
635 yield (None, line.modified.lineno,
630 line.modified.action, line.modified.content)
636 line.modified.action, line.modified.content)
631
637
632 for b in buf:
638 for b in buf:
633 yield b
639 yield b
634
640
635 return generator()
641 return generator()
@@ -1,1161 +1,1161 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2016 RhodeCode GmbH
3 # Copyright (C) 2011-2016 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25
26 import collections
26 import collections
27 import re
27 import re
28 import difflib
28 import difflib
29 import logging
29 import logging
30
30
31 from itertools import tee, imap
31 from itertools import tee, imap
32
32
33 from pylons.i18n.translation import _
33 from pylons.i18n.translation import _
34
34
35 from rhodecode.lib.vcs.exceptions import VCSError
35 from rhodecode.lib.vcs.exceptions import VCSError
36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 from rhodecode.lib.vcs.backends.base import EmptyCommit
37 from rhodecode.lib.vcs.backends.base import EmptyCommit
38 from rhodecode.lib.helpers import escape
38 from rhodecode.lib.helpers import escape
39 from rhodecode.lib.utils2 import safe_unicode
39 from rhodecode.lib.utils2 import safe_unicode
40
40
41 log = logging.getLogger(__name__)
41 log = logging.getLogger(__name__)
42
42
43 # define max context, a file with more than this numbers of lines is unusable
43 # define max context, a file with more than this numbers of lines is unusable
44 # in browser anyway
44 # in browser anyway
45 MAX_CONTEXT = 1024 * 1014
45 MAX_CONTEXT = 1024 * 1014
46
46
47
47
48 class OPS(object):
48 class OPS(object):
49 ADD = 'A'
49 ADD = 'A'
50 MOD = 'M'
50 MOD = 'M'
51 DEL = 'D'
51 DEL = 'D'
52
52
53
53
54 def wrap_to_table(str_):
54 def wrap_to_table(str_):
55 return '''<table class="code-difftable">
55 return '''<table class="code-difftable">
56 <tr class="line no-comment">
56 <tr class="line no-comment">
57 <td class="add-comment-line tooltip" title="%s"><span class="add-comment-content"></span></td>
57 <td class="add-comment-line tooltip" title="%s"><span class="add-comment-content"></span></td>
58 <td></td>
58 <td></td>
59 <td class="lineno new"></td>
59 <td class="lineno new"></td>
60 <td class="code no-comment"><pre>%s</pre></td>
60 <td class="code no-comment"><pre>%s</pre></td>
61 </tr>
61 </tr>
62 </table>''' % (_('Click to comment'), str_)
62 </table>''' % (_('Click to comment'), str_)
63
63
64
64
65 def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,
65 def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,
66 show_full_diff=False, ignore_whitespace=True, line_context=3,
66 show_full_diff=False, ignore_whitespace=True, line_context=3,
67 enable_comments=False):
67 enable_comments=False):
68 """
68 """
69 returns a wrapped diff into a table, checks for cut_off_limit for file and
69 returns a wrapped diff into a table, checks for cut_off_limit for file and
70 whole diff and presents proper message
70 whole diff and presents proper message
71 """
71 """
72
72
73 if filenode_old is None:
73 if filenode_old is None:
74 filenode_old = FileNode(filenode_new.path, '', EmptyCommit())
74 filenode_old = FileNode(filenode_new.path, '', EmptyCommit())
75
75
76 if filenode_old.is_binary or filenode_new.is_binary:
76 if filenode_old.is_binary or filenode_new.is_binary:
77 diff = wrap_to_table(_('Binary file'))
77 diff = wrap_to_table(_('Binary file'))
78 stats = None
78 stats = None
79 size = 0
79 size = 0
80 data = None
80 data = None
81
81
82 elif diff_limit != -1 and (diff_limit is None or
82 elif diff_limit != -1 and (diff_limit is None or
83 (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
83 (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
84
84
85 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
85 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
86 ignore_whitespace=ignore_whitespace,
86 ignore_whitespace=ignore_whitespace,
87 context=line_context)
87 context=line_context)
88 diff_processor = DiffProcessor(
88 diff_processor = DiffProcessor(
89 f_gitdiff, format='gitdiff', diff_limit=diff_limit,
89 f_gitdiff, format='gitdiff', diff_limit=diff_limit,
90 file_limit=file_limit, show_full_diff=show_full_diff)
90 file_limit=file_limit, show_full_diff=show_full_diff)
91 _parsed = diff_processor.prepare()
91 _parsed = diff_processor.prepare()
92
92
93 diff = diff_processor.as_html(enable_comments=enable_comments)
93 diff = diff_processor.as_html(enable_comments=enable_comments)
94 stats = _parsed[0]['stats'] if _parsed else None
94 stats = _parsed[0]['stats'] if _parsed else None
95 size = len(diff or '')
95 size = len(diff or '')
96 data = _parsed[0] if _parsed else None
96 data = _parsed[0] if _parsed else None
97 else:
97 else:
98 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
98 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
99 'diff menu to display this diff'))
99 'diff menu to display this diff'))
100 stats = None
100 stats = None
101 size = 0
101 size = 0
102 data = None
102 data = None
103 if not diff:
103 if not diff:
104 submodules = filter(lambda o: isinstance(o, SubModuleNode),
104 submodules = filter(lambda o: isinstance(o, SubModuleNode),
105 [filenode_new, filenode_old])
105 [filenode_new, filenode_old])
106 if submodules:
106 if submodules:
107 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
107 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
108 else:
108 else:
109 diff = wrap_to_table(_('No changes detected'))
109 diff = wrap_to_table(_('No changes detected'))
110
110
111 cs1 = filenode_old.commit.raw_id
111 cs1 = filenode_old.commit.raw_id
112 cs2 = filenode_new.commit.raw_id
112 cs2 = filenode_new.commit.raw_id
113
113
114 return size, cs1, cs2, diff, stats, data
114 return size, cs1, cs2, diff, stats, data
115
115
116
116
117 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
117 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
118 """
118 """
119 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
119 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
120
120
121 :param ignore_whitespace: ignore whitespaces in diff
121 :param ignore_whitespace: ignore whitespaces in diff
122 """
122 """
123 # make sure we pass in default context
123 # make sure we pass in default context
124 context = context or 3
124 context = context or 3
125 # protect against IntOverflow when passing HUGE context
125 # protect against IntOverflow when passing HUGE context
126 if context > MAX_CONTEXT:
126 if context > MAX_CONTEXT:
127 context = MAX_CONTEXT
127 context = MAX_CONTEXT
128
128
129 submodules = filter(lambda o: isinstance(o, SubModuleNode),
129 submodules = filter(lambda o: isinstance(o, SubModuleNode),
130 [filenode_new, filenode_old])
130 [filenode_new, filenode_old])
131 if submodules:
131 if submodules:
132 return ''
132 return ''
133
133
134 for filenode in (filenode_old, filenode_new):
134 for filenode in (filenode_old, filenode_new):
135 if not isinstance(filenode, FileNode):
135 if not isinstance(filenode, FileNode):
136 raise VCSError(
136 raise VCSError(
137 "Given object should be FileNode object, not %s"
137 "Given object should be FileNode object, not %s"
138 % filenode.__class__)
138 % filenode.__class__)
139
139
140 repo = filenode_new.commit.repository
140 repo = filenode_new.commit.repository
141 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
141 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
142 new_commit = filenode_new.commit
142 new_commit = filenode_new.commit
143
143
144 vcs_gitdiff = repo.get_diff(
144 vcs_gitdiff = repo.get_diff(
145 old_commit, new_commit, filenode_new.path,
145 old_commit, new_commit, filenode_new.path,
146 ignore_whitespace, context, path1=filenode_old.path)
146 ignore_whitespace, context, path1=filenode_old.path)
147 return vcs_gitdiff
147 return vcs_gitdiff
148
148
149 NEW_FILENODE = 1
149 NEW_FILENODE = 1
150 DEL_FILENODE = 2
150 DEL_FILENODE = 2
151 MOD_FILENODE = 3
151 MOD_FILENODE = 3
152 RENAMED_FILENODE = 4
152 RENAMED_FILENODE = 4
153 COPIED_FILENODE = 5
153 COPIED_FILENODE = 5
154 CHMOD_FILENODE = 6
154 CHMOD_FILENODE = 6
155 BIN_FILENODE = 7
155 BIN_FILENODE = 7
156
156
157
157
158 class LimitedDiffContainer(object):
158 class LimitedDiffContainer(object):
159
159
160 def __init__(self, diff_limit, cur_diff_size, diff):
160 def __init__(self, diff_limit, cur_diff_size, diff):
161 self.diff = diff
161 self.diff = diff
162 self.diff_limit = diff_limit
162 self.diff_limit = diff_limit
163 self.cur_diff_size = cur_diff_size
163 self.cur_diff_size = cur_diff_size
164
164
165 def __getitem__(self, key):
165 def __getitem__(self, key):
166 return self.diff.__getitem__(key)
166 return self.diff.__getitem__(key)
167
167
168 def __iter__(self):
168 def __iter__(self):
169 for l in self.diff:
169 for l in self.diff:
170 yield l
170 yield l
171
171
172
172
173 class Action(object):
173 class Action(object):
174 """
174 """
175 Contains constants for the action value of the lines in a parsed diff.
175 Contains constants for the action value of the lines in a parsed diff.
176 """
176 """
177
177
178 ADD = 'add'
178 ADD = 'add'
179 DELETE = 'del'
179 DELETE = 'del'
180 UNMODIFIED = 'unmod'
180 UNMODIFIED = 'unmod'
181
181
182 CONTEXT = 'context'
182 CONTEXT = 'context'
183 CONTEXT_OLD = 'context-old'
183 OLD_NO_NL = 'old-no-nl'
184 CONTEXT_NEW = 'context-new'
184 NEW_NO_NL = 'new-no-nl'
185
185
186
186
187 class DiffProcessor(object):
187 class DiffProcessor(object):
188 """
188 """
189 Give it a unified or git diff and it returns a list of the files that were
189 Give it a unified or git diff and it returns a list of the files that were
190 mentioned in the diff together with a dict of meta information that
190 mentioned in the diff together with a dict of meta information that
191 can be used to render it in a HTML template.
191 can be used to render it in a HTML template.
192
192
193 .. note:: Unicode handling
193 .. note:: Unicode handling
194
194
195 The original diffs are a byte sequence and can contain filenames
195 The original diffs are a byte sequence and can contain filenames
196 in mixed encodings. This class generally returns `unicode` objects
196 in mixed encodings. This class generally returns `unicode` objects
197 since the result is intended for presentation to the user.
197 since the result is intended for presentation to the user.
198
198
199 """
199 """
200 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
200 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
201 _newline_marker = re.compile(r'^\\ No newline at end of file')
201 _newline_marker = re.compile(r'^\\ No newline at end of file')
202
202
203 # used for inline highlighter word split
203 # used for inline highlighter word split
204 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
204 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
205
205
206 def __init__(self, diff, format='gitdiff', diff_limit=None,
206 def __init__(self, diff, format='gitdiff', diff_limit=None,
207 file_limit=None, show_full_diff=True):
207 file_limit=None, show_full_diff=True):
208 """
208 """
209 :param diff: A `Diff` object representing a diff from a vcs backend
209 :param diff: A `Diff` object representing a diff from a vcs backend
210 :param format: format of diff passed, `udiff` or `gitdiff`
210 :param format: format of diff passed, `udiff` or `gitdiff`
211 :param diff_limit: define the size of diff that is considered "big"
211 :param diff_limit: define the size of diff that is considered "big"
212 based on that parameter cut off will be triggered, set to None
212 based on that parameter cut off will be triggered, set to None
213 to show full diff
213 to show full diff
214 """
214 """
215 self._diff = diff
215 self._diff = diff
216 self._format = format
216 self._format = format
217 self.adds = 0
217 self.adds = 0
218 self.removes = 0
218 self.removes = 0
219 # calculate diff size
219 # calculate diff size
220 self.diff_limit = diff_limit
220 self.diff_limit = diff_limit
221 self.file_limit = file_limit
221 self.file_limit = file_limit
222 self.show_full_diff = show_full_diff
222 self.show_full_diff = show_full_diff
223 self.cur_diff_size = 0
223 self.cur_diff_size = 0
224 self.parsed = False
224 self.parsed = False
225 self.parsed_diff = []
225 self.parsed_diff = []
226
226
227 if format == 'gitdiff':
227 if format == 'gitdiff':
228 self.differ = self._highlight_line_difflib
228 self.differ = self._highlight_line_difflib
229 self._parser = self._parse_gitdiff
229 self._parser = self._parse_gitdiff
230 else:
230 else:
231 self.differ = self._highlight_line_udiff
231 self.differ = self._highlight_line_udiff
232 self._parser = self._new_parse_gitdiff
232 self._parser = self._new_parse_gitdiff
233
233
234 def _copy_iterator(self):
234 def _copy_iterator(self):
235 """
235 """
236 make a fresh copy of generator, we should not iterate thru
236 make a fresh copy of generator, we should not iterate thru
237 an original as it's needed for repeating operations on
237 an original as it's needed for repeating operations on
238 this instance of DiffProcessor
238 this instance of DiffProcessor
239 """
239 """
240 self.__udiff, iterator_copy = tee(self.__udiff)
240 self.__udiff, iterator_copy = tee(self.__udiff)
241 return iterator_copy
241 return iterator_copy
242
242
243 def _escaper(self, string):
243 def _escaper(self, string):
244 """
244 """
245 Escaper for diff escapes special chars and checks the diff limit
245 Escaper for diff escapes special chars and checks the diff limit
246
246
247 :param string:
247 :param string:
248 """
248 """
249
249
250 self.cur_diff_size += len(string)
250 self.cur_diff_size += len(string)
251
251
252 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
252 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
253 raise DiffLimitExceeded('Diff Limit Exceeded')
253 raise DiffLimitExceeded('Diff Limit Exceeded')
254
254
255 return safe_unicode(string)\
255 return safe_unicode(string)\
256 .replace('&', '&amp;')\
256 .replace('&', '&amp;')\
257 .replace('<', '&lt;')\
257 .replace('<', '&lt;')\
258 .replace('>', '&gt;')
258 .replace('>', '&gt;')
259
259
260 def _line_counter(self, l):
260 def _line_counter(self, l):
261 """
261 """
262 Checks each line and bumps total adds/removes for this diff
262 Checks each line and bumps total adds/removes for this diff
263
263
264 :param l:
264 :param l:
265 """
265 """
266 if l.startswith('+') and not l.startswith('+++'):
266 if l.startswith('+') and not l.startswith('+++'):
267 self.adds += 1
267 self.adds += 1
268 elif l.startswith('-') and not l.startswith('---'):
268 elif l.startswith('-') and not l.startswith('---'):
269 self.removes += 1
269 self.removes += 1
270 return safe_unicode(l)
270 return safe_unicode(l)
271
271
272 def _highlight_line_difflib(self, line, next_):
272 def _highlight_line_difflib(self, line, next_):
273 """
273 """
274 Highlight inline changes in both lines.
274 Highlight inline changes in both lines.
275 """
275 """
276
276
277 if line['action'] == Action.DELETE:
277 if line['action'] == Action.DELETE:
278 old, new = line, next_
278 old, new = line, next_
279 else:
279 else:
280 old, new = next_, line
280 old, new = next_, line
281
281
282 oldwords = self._token_re.split(old['line'])
282 oldwords = self._token_re.split(old['line'])
283 newwords = self._token_re.split(new['line'])
283 newwords = self._token_re.split(new['line'])
284 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
284 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
285
285
286 oldfragments, newfragments = [], []
286 oldfragments, newfragments = [], []
287 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
287 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
288 oldfrag = ''.join(oldwords[i1:i2])
288 oldfrag = ''.join(oldwords[i1:i2])
289 newfrag = ''.join(newwords[j1:j2])
289 newfrag = ''.join(newwords[j1:j2])
290 if tag != 'equal':
290 if tag != 'equal':
291 if oldfrag:
291 if oldfrag:
292 oldfrag = '<del>%s</del>' % oldfrag
292 oldfrag = '<del>%s</del>' % oldfrag
293 if newfrag:
293 if newfrag:
294 newfrag = '<ins>%s</ins>' % newfrag
294 newfrag = '<ins>%s</ins>' % newfrag
295 oldfragments.append(oldfrag)
295 oldfragments.append(oldfrag)
296 newfragments.append(newfrag)
296 newfragments.append(newfrag)
297
297
298 old['line'] = "".join(oldfragments)
298 old['line'] = "".join(oldfragments)
299 new['line'] = "".join(newfragments)
299 new['line'] = "".join(newfragments)
300
300
301 def _highlight_line_udiff(self, line, next_):
301 def _highlight_line_udiff(self, line, next_):
302 """
302 """
303 Highlight inline changes in both lines.
303 Highlight inline changes in both lines.
304 """
304 """
305 start = 0
305 start = 0
306 limit = min(len(line['line']), len(next_['line']))
306 limit = min(len(line['line']), len(next_['line']))
307 while start < limit and line['line'][start] == next_['line'][start]:
307 while start < limit and line['line'][start] == next_['line'][start]:
308 start += 1
308 start += 1
309 end = -1
309 end = -1
310 limit -= start
310 limit -= start
311 while -end <= limit and line['line'][end] == next_['line'][end]:
311 while -end <= limit and line['line'][end] == next_['line'][end]:
312 end -= 1
312 end -= 1
313 end += 1
313 end += 1
314 if start or end:
314 if start or end:
315 def do(l):
315 def do(l):
316 last = end + len(l['line'])
316 last = end + len(l['line'])
317 if l['action'] == Action.ADD:
317 if l['action'] == Action.ADD:
318 tag = 'ins'
318 tag = 'ins'
319 else:
319 else:
320 tag = 'del'
320 tag = 'del'
321 l['line'] = '%s<%s>%s</%s>%s' % (
321 l['line'] = '%s<%s>%s</%s>%s' % (
322 l['line'][:start],
322 l['line'][:start],
323 tag,
323 tag,
324 l['line'][start:last],
324 l['line'][start:last],
325 tag,
325 tag,
326 l['line'][last:]
326 l['line'][last:]
327 )
327 )
328 do(line)
328 do(line)
329 do(next_)
329 do(next_)
330
330
331 def _clean_line(self, line, command):
331 def _clean_line(self, line, command):
332 if command in ['+', '-', ' ']:
332 if command in ['+', '-', ' ']:
333 # only modify the line if it's actually a diff thing
333 # only modify the line if it's actually a diff thing
334 line = line[1:]
334 line = line[1:]
335 return line
335 return line
336
336
337 def _parse_gitdiff(self, inline_diff=True):
337 def _parse_gitdiff(self, inline_diff=True):
338 _files = []
338 _files = []
339 diff_container = lambda arg: arg
339 diff_container = lambda arg: arg
340
340
341 for chunk in self._diff.chunks():
341 for chunk in self._diff.chunks():
342 head = chunk.header
342 head = chunk.header
343
343
344 diff = imap(self._escaper, chunk.diff.splitlines(1))
344 diff = imap(self._escaper, chunk.diff.splitlines(1))
345 raw_diff = chunk.raw
345 raw_diff = chunk.raw
346 limited_diff = False
346 limited_diff = False
347 exceeds_limit = False
347 exceeds_limit = False
348
348
349 op = None
349 op = None
350 stats = {
350 stats = {
351 'added': 0,
351 'added': 0,
352 'deleted': 0,
352 'deleted': 0,
353 'binary': False,
353 'binary': False,
354 'ops': {},
354 'ops': {},
355 }
355 }
356
356
357 if head['deleted_file_mode']:
357 if head['deleted_file_mode']:
358 op = OPS.DEL
358 op = OPS.DEL
359 stats['binary'] = True
359 stats['binary'] = True
360 stats['ops'][DEL_FILENODE] = 'deleted file'
360 stats['ops'][DEL_FILENODE] = 'deleted file'
361
361
362 elif head['new_file_mode']:
362 elif head['new_file_mode']:
363 op = OPS.ADD
363 op = OPS.ADD
364 stats['binary'] = True
364 stats['binary'] = True
365 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
365 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
366 else: # modify operation, can be copy, rename or chmod
366 else: # modify operation, can be copy, rename or chmod
367
367
368 # CHMOD
368 # CHMOD
369 if head['new_mode'] and head['old_mode']:
369 if head['new_mode'] and head['old_mode']:
370 op = OPS.MOD
370 op = OPS.MOD
371 stats['binary'] = True
371 stats['binary'] = True
372 stats['ops'][CHMOD_FILENODE] = (
372 stats['ops'][CHMOD_FILENODE] = (
373 'modified file chmod %s => %s' % (
373 'modified file chmod %s => %s' % (
374 head['old_mode'], head['new_mode']))
374 head['old_mode'], head['new_mode']))
375 # RENAME
375 # RENAME
376 if head['rename_from'] != head['rename_to']:
376 if head['rename_from'] != head['rename_to']:
377 op = OPS.MOD
377 op = OPS.MOD
378 stats['binary'] = True
378 stats['binary'] = True
379 stats['ops'][RENAMED_FILENODE] = (
379 stats['ops'][RENAMED_FILENODE] = (
380 'file renamed from %s to %s' % (
380 'file renamed from %s to %s' % (
381 head['rename_from'], head['rename_to']))
381 head['rename_from'], head['rename_to']))
382 # COPY
382 # COPY
383 if head.get('copy_from') and head.get('copy_to'):
383 if head.get('copy_from') and head.get('copy_to'):
384 op = OPS.MOD
384 op = OPS.MOD
385 stats['binary'] = True
385 stats['binary'] = True
386 stats['ops'][COPIED_FILENODE] = (
386 stats['ops'][COPIED_FILENODE] = (
387 'file copied from %s to %s' % (
387 'file copied from %s to %s' % (
388 head['copy_from'], head['copy_to']))
388 head['copy_from'], head['copy_to']))
389
389
390 # If our new parsed headers didn't match anything fallback to
390 # If our new parsed headers didn't match anything fallback to
391 # old style detection
391 # old style detection
392 if op is None:
392 if op is None:
393 if not head['a_file'] and head['b_file']:
393 if not head['a_file'] and head['b_file']:
394 op = OPS.ADD
394 op = OPS.ADD
395 stats['binary'] = True
395 stats['binary'] = True
396 stats['ops'][NEW_FILENODE] = 'new file'
396 stats['ops'][NEW_FILENODE] = 'new file'
397
397
398 elif head['a_file'] and not head['b_file']:
398 elif head['a_file'] and not head['b_file']:
399 op = OPS.DEL
399 op = OPS.DEL
400 stats['binary'] = True
400 stats['binary'] = True
401 stats['ops'][DEL_FILENODE] = 'deleted file'
401 stats['ops'][DEL_FILENODE] = 'deleted file'
402
402
403 # it's not ADD not DELETE
403 # it's not ADD not DELETE
404 if op is None:
404 if op is None:
405 op = OPS.MOD
405 op = OPS.MOD
406 stats['binary'] = True
406 stats['binary'] = True
407 stats['ops'][MOD_FILENODE] = 'modified file'
407 stats['ops'][MOD_FILENODE] = 'modified file'
408
408
409 # a real non-binary diff
409 # a real non-binary diff
410 if head['a_file'] or head['b_file']:
410 if head['a_file'] or head['b_file']:
411 try:
411 try:
412 raw_diff, chunks, _stats = self._parse_lines(diff)
412 raw_diff, chunks, _stats = self._parse_lines(diff)
413 stats['binary'] = False
413 stats['binary'] = False
414 stats['added'] = _stats[0]
414 stats['added'] = _stats[0]
415 stats['deleted'] = _stats[1]
415 stats['deleted'] = _stats[1]
416 # explicit mark that it's a modified file
416 # explicit mark that it's a modified file
417 if op == OPS.MOD:
417 if op == OPS.MOD:
418 stats['ops'][MOD_FILENODE] = 'modified file'
418 stats['ops'][MOD_FILENODE] = 'modified file'
419 exceeds_limit = len(raw_diff) > self.file_limit
419 exceeds_limit = len(raw_diff) > self.file_limit
420
420
421 # changed from _escaper function so we validate size of
421 # changed from _escaper function so we validate size of
422 # each file instead of the whole diff
422 # each file instead of the whole diff
423 # diff will hide big files but still show small ones
423 # diff will hide big files but still show small ones
424 # from my tests, big files are fairly safe to be parsed
424 # from my tests, big files are fairly safe to be parsed
425 # but the browser is the bottleneck
425 # but the browser is the bottleneck
426 if not self.show_full_diff and exceeds_limit:
426 if not self.show_full_diff and exceeds_limit:
427 raise DiffLimitExceeded('File Limit Exceeded')
427 raise DiffLimitExceeded('File Limit Exceeded')
428
428
429 except DiffLimitExceeded:
429 except DiffLimitExceeded:
430 diff_container = lambda _diff: \
430 diff_container = lambda _diff: \
431 LimitedDiffContainer(
431 LimitedDiffContainer(
432 self.diff_limit, self.cur_diff_size, _diff)
432 self.diff_limit, self.cur_diff_size, _diff)
433
433
434 exceeds_limit = len(raw_diff) > self.file_limit
434 exceeds_limit = len(raw_diff) > self.file_limit
435 limited_diff = True
435 limited_diff = True
436 chunks = []
436 chunks = []
437
437
438 else: # GIT format binary patch, or possibly empty diff
438 else: # GIT format binary patch, or possibly empty diff
439 if head['bin_patch']:
439 if head['bin_patch']:
440 # we have operation already extracted, but we mark simply
440 # we have operation already extracted, but we mark simply
441 # it's a diff we wont show for binary files
441 # it's a diff we wont show for binary files
442 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
442 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
443 chunks = []
443 chunks = []
444
444
445 if chunks and not self.show_full_diff and op == OPS.DEL:
445 if chunks and not self.show_full_diff and op == OPS.DEL:
446 # if not full diff mode show deleted file contents
446 # if not full diff mode show deleted file contents
447 # TODO: anderson: if the view is not too big, there is no way
447 # TODO: anderson: if the view is not too big, there is no way
448 # to see the content of the file
448 # to see the content of the file
449 chunks = []
449 chunks = []
450
450
451 chunks.insert(0, [{
451 chunks.insert(0, [{
452 'old_lineno': '',
452 'old_lineno': '',
453 'new_lineno': '',
453 'new_lineno': '',
454 'action': Action.CONTEXT,
454 'action': Action.CONTEXT,
455 'line': msg,
455 'line': msg,
456 } for _op, msg in stats['ops'].iteritems()
456 } for _op, msg in stats['ops'].iteritems()
457 if _op not in [MOD_FILENODE]])
457 if _op not in [MOD_FILENODE]])
458
458
459 _files.append({
459 _files.append({
460 'filename': safe_unicode(head['b_path']),
460 'filename': safe_unicode(head['b_path']),
461 'old_revision': head['a_blob_id'],
461 'old_revision': head['a_blob_id'],
462 'new_revision': head['b_blob_id'],
462 'new_revision': head['b_blob_id'],
463 'chunks': chunks,
463 'chunks': chunks,
464 'raw_diff': safe_unicode(raw_diff),
464 'raw_diff': safe_unicode(raw_diff),
465 'operation': op,
465 'operation': op,
466 'stats': stats,
466 'stats': stats,
467 'exceeds_limit': exceeds_limit,
467 'exceeds_limit': exceeds_limit,
468 'is_limited_diff': limited_diff,
468 'is_limited_diff': limited_diff,
469 })
469 })
470
470
471 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
471 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
472 OPS.DEL: 2}.get(info['operation'])
472 OPS.DEL: 2}.get(info['operation'])
473
473
474 if not inline_diff:
474 if not inline_diff:
475 return diff_container(sorted(_files, key=sorter))
475 return diff_container(sorted(_files, key=sorter))
476
476
477 # highlight inline changes
477 # highlight inline changes
478 for diff_data in _files:
478 for diff_data in _files:
479 for chunk in diff_data['chunks']:
479 for chunk in diff_data['chunks']:
480 lineiter = iter(chunk)
480 lineiter = iter(chunk)
481 try:
481 try:
482 while 1:
482 while 1:
483 line = lineiter.next()
483 line = lineiter.next()
484 if line['action'] not in (
484 if line['action'] not in (
485 Action.UNMODIFIED, Action.CONTEXT):
485 Action.UNMODIFIED, Action.CONTEXT):
486 nextline = lineiter.next()
486 nextline = lineiter.next()
487 if nextline['action'] in ['unmod', 'context'] or \
487 if nextline['action'] in ['unmod', 'context'] or \
488 nextline['action'] == line['action']:
488 nextline['action'] == line['action']:
489 continue
489 continue
490 self.differ(line, nextline)
490 self.differ(line, nextline)
491 except StopIteration:
491 except StopIteration:
492 pass
492 pass
493
493
494 return diff_container(sorted(_files, key=sorter))
494 return diff_container(sorted(_files, key=sorter))
495
495
496
496
497 # FIXME: NEWDIFFS: dan: this replaces the old _escaper function
497 # FIXME: NEWDIFFS: dan: this replaces the old _escaper function
498 def _process_line(self, string):
498 def _process_line(self, string):
499 """
499 """
500 Process a diff line, checks the diff limit
500 Process a diff line, checks the diff limit
501
501
502 :param string:
502 :param string:
503 """
503 """
504
504
505 self.cur_diff_size += len(string)
505 self.cur_diff_size += len(string)
506
506
507 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
507 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
508 raise DiffLimitExceeded('Diff Limit Exceeded')
508 raise DiffLimitExceeded('Diff Limit Exceeded')
509
509
510 return safe_unicode(string)
510 return safe_unicode(string)
511
511
512 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
512 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
513 def _new_parse_gitdiff(self, inline_diff=True):
513 def _new_parse_gitdiff(self, inline_diff=True):
514 _files = []
514 _files = []
515 diff_container = lambda arg: arg
515 diff_container = lambda arg: arg
516 for chunk in self._diff.chunks():
516 for chunk in self._diff.chunks():
517 head = chunk.header
517 head = chunk.header
518 log.debug('parsing diff %r' % head)
518 log.debug('parsing diff %r' % head)
519
519
520 diff = imap(self._process_line, chunk.diff.splitlines(1))
520 diff = imap(self._process_line, chunk.diff.splitlines(1))
521 raw_diff = chunk.raw
521 raw_diff = chunk.raw
522 limited_diff = False
522 limited_diff = False
523 exceeds_limit = False
523 exceeds_limit = False
524 # if 'empty_file_to_modify_and_rename' in head['a_path']:
524 # if 'empty_file_to_modify_and_rename' in head['a_path']:
525 # 1/0
525 # 1/0
526 op = None
526 op = None
527 stats = {
527 stats = {
528 'added': 0,
528 'added': 0,
529 'deleted': 0,
529 'deleted': 0,
530 'binary': False,
530 'binary': False,
531 'old_mode': None,
531 'old_mode': None,
532 'new_mode': None,
532 'new_mode': None,
533 'ops': {},
533 'ops': {},
534 }
534 }
535 if head['old_mode']:
535 if head['old_mode']:
536 stats['old_mode'] = head['old_mode']
536 stats['old_mode'] = head['old_mode']
537 if head['new_mode']:
537 if head['new_mode']:
538 stats['new_mode'] = head['new_mode']
538 stats['new_mode'] = head['new_mode']
539 if head['b_mode']:
539 if head['b_mode']:
540 stats['new_mode'] = head['b_mode']
540 stats['new_mode'] = head['b_mode']
541
541
542 if head['deleted_file_mode']:
542 if head['deleted_file_mode']:
543 op = OPS.DEL
543 op = OPS.DEL
544 stats['binary'] = True
544 stats['binary'] = True
545 stats['ops'][DEL_FILENODE] = 'deleted file'
545 stats['ops'][DEL_FILENODE] = 'deleted file'
546
546
547 elif head['new_file_mode']:
547 elif head['new_file_mode']:
548 op = OPS.ADD
548 op = OPS.ADD
549 stats['binary'] = True
549 stats['binary'] = True
550 stats['old_mode'] = None
550 stats['old_mode'] = None
551 stats['new_mode'] = head['new_file_mode']
551 stats['new_mode'] = head['new_file_mode']
552 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
552 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
553 else: # modify operation, can be copy, rename or chmod
553 else: # modify operation, can be copy, rename or chmod
554
554
555 # CHMOD
555 # CHMOD
556 if head['new_mode'] and head['old_mode']:
556 if head['new_mode'] and head['old_mode']:
557 op = OPS.MOD
557 op = OPS.MOD
558 stats['binary'] = True
558 stats['binary'] = True
559 stats['ops'][CHMOD_FILENODE] = (
559 stats['ops'][CHMOD_FILENODE] = (
560 'modified file chmod %s => %s' % (
560 'modified file chmod %s => %s' % (
561 head['old_mode'], head['new_mode']))
561 head['old_mode'], head['new_mode']))
562
562
563 # RENAME
563 # RENAME
564 if head['rename_from'] != head['rename_to']:
564 if head['rename_from'] != head['rename_to']:
565 op = OPS.MOD
565 op = OPS.MOD
566 stats['binary'] = True
566 stats['binary'] = True
567 stats['renamed'] = (head['rename_from'], head['rename_to'])
567 stats['renamed'] = (head['rename_from'], head['rename_to'])
568 stats['ops'][RENAMED_FILENODE] = (
568 stats['ops'][RENAMED_FILENODE] = (
569 'file renamed from %s to %s' % (
569 'file renamed from %s to %s' % (
570 head['rename_from'], head['rename_to']))
570 head['rename_from'], head['rename_to']))
571 # COPY
571 # COPY
572 if head.get('copy_from') and head.get('copy_to'):
572 if head.get('copy_from') and head.get('copy_to'):
573 op = OPS.MOD
573 op = OPS.MOD
574 stats['binary'] = True
574 stats['binary'] = True
575 stats['copied'] = (head['copy_from'], head['copy_to'])
575 stats['copied'] = (head['copy_from'], head['copy_to'])
576 stats['ops'][COPIED_FILENODE] = (
576 stats['ops'][COPIED_FILENODE] = (
577 'file copied from %s to %s' % (
577 'file copied from %s to %s' % (
578 head['copy_from'], head['copy_to']))
578 head['copy_from'], head['copy_to']))
579
579
580 # If our new parsed headers didn't match anything fallback to
580 # If our new parsed headers didn't match anything fallback to
581 # old style detection
581 # old style detection
582 if op is None:
582 if op is None:
583 if not head['a_file'] and head['b_file']:
583 if not head['a_file'] and head['b_file']:
584 op = OPS.ADD
584 op = OPS.ADD
585 stats['binary'] = True
585 stats['binary'] = True
586 stats['new_file'] = True
586 stats['new_file'] = True
587 stats['ops'][NEW_FILENODE] = 'new file'
587 stats['ops'][NEW_FILENODE] = 'new file'
588
588
589 elif head['a_file'] and not head['b_file']:
589 elif head['a_file'] and not head['b_file']:
590 op = OPS.DEL
590 op = OPS.DEL
591 stats['binary'] = True
591 stats['binary'] = True
592 stats['ops'][DEL_FILENODE] = 'deleted file'
592 stats['ops'][DEL_FILENODE] = 'deleted file'
593
593
594 # it's not ADD not DELETE
594 # it's not ADD not DELETE
595 if op is None:
595 if op is None:
596 op = OPS.MOD
596 op = OPS.MOD
597 stats['binary'] = True
597 stats['binary'] = True
598 stats['ops'][MOD_FILENODE] = 'modified file'
598 stats['ops'][MOD_FILENODE] = 'modified file'
599
599
600 # a real non-binary diff
600 # a real non-binary diff
601 if head['a_file'] or head['b_file']:
601 if head['a_file'] or head['b_file']:
602 try:
602 try:
603 raw_diff, chunks, _stats = self._new_parse_lines(diff)
603 raw_diff, chunks, _stats = self._new_parse_lines(diff)
604 stats['binary'] = False
604 stats['binary'] = False
605 stats['added'] = _stats[0]
605 stats['added'] = _stats[0]
606 stats['deleted'] = _stats[1]
606 stats['deleted'] = _stats[1]
607 # explicit mark that it's a modified file
607 # explicit mark that it's a modified file
608 if op == OPS.MOD:
608 if op == OPS.MOD:
609 stats['ops'][MOD_FILENODE] = 'modified file'
609 stats['ops'][MOD_FILENODE] = 'modified file'
610 exceeds_limit = len(raw_diff) > self.file_limit
610 exceeds_limit = len(raw_diff) > self.file_limit
611
611
612 # changed from _escaper function so we validate size of
612 # changed from _escaper function so we validate size of
613 # each file instead of the whole diff
613 # each file instead of the whole diff
614 # diff will hide big files but still show small ones
614 # diff will hide big files but still show small ones
615 # from my tests, big files are fairly safe to be parsed
615 # from my tests, big files are fairly safe to be parsed
616 # but the browser is the bottleneck
616 # but the browser is the bottleneck
617 if not self.show_full_diff and exceeds_limit:
617 if not self.show_full_diff and exceeds_limit:
618 raise DiffLimitExceeded('File Limit Exceeded')
618 raise DiffLimitExceeded('File Limit Exceeded')
619
619
620 except DiffLimitExceeded:
620 except DiffLimitExceeded:
621 diff_container = lambda _diff: \
621 diff_container = lambda _diff: \
622 LimitedDiffContainer(
622 LimitedDiffContainer(
623 self.diff_limit, self.cur_diff_size, _diff)
623 self.diff_limit, self.cur_diff_size, _diff)
624
624
625 exceeds_limit = len(raw_diff) > self.file_limit
625 exceeds_limit = len(raw_diff) > self.file_limit
626 limited_diff = True
626 limited_diff = True
627 chunks = []
627 chunks = []
628
628
629 else: # GIT format binary patch, or possibly empty diff
629 else: # GIT format binary patch, or possibly empty diff
630 if head['bin_patch']:
630 if head['bin_patch']:
631 # we have operation already extracted, but we mark simply
631 # we have operation already extracted, but we mark simply
632 # it's a diff we wont show for binary files
632 # it's a diff we wont show for binary files
633 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
633 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
634 chunks = []
634 chunks = []
635
635
636 if chunks and not self.show_full_diff and op == OPS.DEL:
636 if chunks and not self.show_full_diff and op == OPS.DEL:
637 # if not full diff mode show deleted file contents
637 # if not full diff mode show deleted file contents
638 # TODO: anderson: if the view is not too big, there is no way
638 # TODO: anderson: if the view is not too big, there is no way
639 # to see the content of the file
639 # to see the content of the file
640 chunks = []
640 chunks = []
641
641
642 chunks.insert(0, [{
642 chunks.insert(0, [{
643 'old_lineno': '',
643 'old_lineno': '',
644 'new_lineno': '',
644 'new_lineno': '',
645 'action': Action.CONTEXT,
645 'action': Action.CONTEXT,
646 'line': msg,
646 'line': msg,
647 } for _op, msg in stats['ops'].iteritems()
647 } for _op, msg in stats['ops'].iteritems()
648 if _op not in [MOD_FILENODE]])
648 if _op not in [MOD_FILENODE]])
649
649
650 original_filename = safe_unicode(head['a_path'])
650 original_filename = safe_unicode(head['a_path'])
651 _files.append({
651 _files.append({
652 'original_filename': original_filename,
652 'original_filename': original_filename,
653 'filename': safe_unicode(head['b_path']),
653 'filename': safe_unicode(head['b_path']),
654 'old_revision': head['a_blob_id'],
654 'old_revision': head['a_blob_id'],
655 'new_revision': head['b_blob_id'],
655 'new_revision': head['b_blob_id'],
656 'chunks': chunks,
656 'chunks': chunks,
657 'raw_diff': safe_unicode(raw_diff),
657 'raw_diff': safe_unicode(raw_diff),
658 'operation': op,
658 'operation': op,
659 'stats': stats,
659 'stats': stats,
660 'exceeds_limit': exceeds_limit,
660 'exceeds_limit': exceeds_limit,
661 'is_limited_diff': limited_diff,
661 'is_limited_diff': limited_diff,
662 })
662 })
663
663
664
664
665 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
665 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
666 OPS.DEL: 2}.get(info['operation'])
666 OPS.DEL: 2}.get(info['operation'])
667
667
668 return diff_container(sorted(_files, key=sorter))
668 return diff_container(sorted(_files, key=sorter))
669
669
670 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
670 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
671 def _parse_lines(self, diff):
671 def _parse_lines(self, diff):
672 """
672 """
673 Parse the diff an return data for the template.
673 Parse the diff an return data for the template.
674 """
674 """
675
675
676 lineiter = iter(diff)
676 lineiter = iter(diff)
677 stats = [0, 0]
677 stats = [0, 0]
678 chunks = []
678 chunks = []
679 raw_diff = []
679 raw_diff = []
680
680
681 try:
681 try:
682 line = lineiter.next()
682 line = lineiter.next()
683
683
684 while line:
684 while line:
685 raw_diff.append(line)
685 raw_diff.append(line)
686 lines = []
686 lines = []
687 chunks.append(lines)
687 chunks.append(lines)
688
688
689 match = self._chunk_re.match(line)
689 match = self._chunk_re.match(line)
690
690
691 if not match:
691 if not match:
692 break
692 break
693
693
694 gr = match.groups()
694 gr = match.groups()
695 (old_line, old_end,
695 (old_line, old_end,
696 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
696 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
697 old_line -= 1
697 old_line -= 1
698 new_line -= 1
698 new_line -= 1
699
699
700 context = len(gr) == 5
700 context = len(gr) == 5
701 old_end += old_line
701 old_end += old_line
702 new_end += new_line
702 new_end += new_line
703
703
704 if context:
704 if context:
705 # skip context only if it's first line
705 # skip context only if it's first line
706 if int(gr[0]) > 1:
706 if int(gr[0]) > 1:
707 lines.append({
707 lines.append({
708 'old_lineno': '...',
708 'old_lineno': '...',
709 'new_lineno': '...',
709 'new_lineno': '...',
710 'action': Action.CONTEXT,
710 'action': Action.CONTEXT,
711 'line': line,
711 'line': line,
712 })
712 })
713
713
714 line = lineiter.next()
714 line = lineiter.next()
715
715
716 while old_line < old_end or new_line < new_end:
716 while old_line < old_end or new_line < new_end:
717 command = ' '
717 command = ' '
718 if line:
718 if line:
719 command = line[0]
719 command = line[0]
720
720
721 affects_old = affects_new = False
721 affects_old = affects_new = False
722
722
723 # ignore those if we don't expect them
723 # ignore those if we don't expect them
724 if command in '#@':
724 if command in '#@':
725 continue
725 continue
726 elif command == '+':
726 elif command == '+':
727 affects_new = True
727 affects_new = True
728 action = Action.ADD
728 action = Action.ADD
729 stats[0] += 1
729 stats[0] += 1
730 elif command == '-':
730 elif command == '-':
731 affects_old = True
731 affects_old = True
732 action = Action.DELETE
732 action = Action.DELETE
733 stats[1] += 1
733 stats[1] += 1
734 else:
734 else:
735 affects_old = affects_new = True
735 affects_old = affects_new = True
736 action = Action.UNMODIFIED
736 action = Action.UNMODIFIED
737
737
738 if not self._newline_marker.match(line):
738 if not self._newline_marker.match(line):
739 old_line += affects_old
739 old_line += affects_old
740 new_line += affects_new
740 new_line += affects_new
741 lines.append({
741 lines.append({
742 'old_lineno': affects_old and old_line or '',
742 'old_lineno': affects_old and old_line or '',
743 'new_lineno': affects_new and new_line or '',
743 'new_lineno': affects_new and new_line or '',
744 'action': action,
744 'action': action,
745 'line': self._clean_line(line, command)
745 'line': self._clean_line(line, command)
746 })
746 })
747 raw_diff.append(line)
747 raw_diff.append(line)
748
748
749 line = lineiter.next()
749 line = lineiter.next()
750
750
751 if self._newline_marker.match(line):
751 if self._newline_marker.match(line):
752 # we need to append to lines, since this is not
752 # we need to append to lines, since this is not
753 # counted in the line specs of diff
753 # counted in the line specs of diff
754 lines.append({
754 lines.append({
755 'old_lineno': '...',
755 'old_lineno': '...',
756 'new_lineno': '...',
756 'new_lineno': '...',
757 'action': Action.CONTEXT,
757 'action': Action.CONTEXT,
758 'line': self._clean_line(line, command)
758 'line': self._clean_line(line, command)
759 })
759 })
760
760
761 except StopIteration:
761 except StopIteration:
762 pass
762 pass
763 return ''.join(raw_diff), chunks, stats
763 return ''.join(raw_diff), chunks, stats
764
764
765 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
765 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
766 def _new_parse_lines(self, diff):
766 def _new_parse_lines(self, diff):
767 """
767 """
768 Parse the diff an return data for the template.
768 Parse the diff an return data for the template.
769 """
769 """
770
770
771 lineiter = iter(diff)
771 lineiter = iter(diff)
772 stats = [0, 0]
772 stats = [0, 0]
773 chunks = []
773 chunks = []
774 raw_diff = []
774 raw_diff = []
775
775
776 try:
776 try:
777 line = lineiter.next()
777 line = lineiter.next()
778
778
779 while line:
779 while line:
780 raw_diff.append(line)
780 raw_diff.append(line)
781 match = self._chunk_re.match(line)
781 match = self._chunk_re.match(line)
782
782
783 if not match:
783 if not match:
784 break
784 break
785
785
786 gr = match.groups()
786 gr = match.groups()
787 (old_line, old_end,
787 (old_line, old_end,
788 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
788 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
789
789
790 lines = []
790 lines = []
791 hunk = {
791 hunk = {
792 'section_header': gr[-1],
792 'section_header': gr[-1],
793 'source_start': old_line,
793 'source_start': old_line,
794 'source_length': old_end,
794 'source_length': old_end,
795 'target_start': new_line,
795 'target_start': new_line,
796 'target_length': new_end,
796 'target_length': new_end,
797 'lines': lines,
797 'lines': lines,
798 }
798 }
799 chunks.append(hunk)
799 chunks.append(hunk)
800
800
801 old_line -= 1
801 old_line -= 1
802 new_line -= 1
802 new_line -= 1
803
803
804 context = len(gr) == 5
804 context = len(gr) == 5
805 old_end += old_line
805 old_end += old_line
806 new_end += new_line
806 new_end += new_line
807
807
808 line = lineiter.next()
808 line = lineiter.next()
809
809
810 while old_line < old_end or new_line < new_end:
810 while old_line < old_end or new_line < new_end:
811 command = ' '
811 command = ' '
812 if line:
812 if line:
813 command = line[0]
813 command = line[0]
814
814
815 affects_old = affects_new = False
815 affects_old = affects_new = False
816
816
817 # ignore those if we don't expect them
817 # ignore those if we don't expect them
818 if command in '#@':
818 if command in '#@':
819 continue
819 continue
820 elif command == '+':
820 elif command == '+':
821 affects_new = True
821 affects_new = True
822 action = Action.ADD
822 action = Action.ADD
823 stats[0] += 1
823 stats[0] += 1
824 elif command == '-':
824 elif command == '-':
825 affects_old = True
825 affects_old = True
826 action = Action.DELETE
826 action = Action.DELETE
827 stats[1] += 1
827 stats[1] += 1
828 else:
828 else:
829 affects_old = affects_new = True
829 affects_old = affects_new = True
830 action = Action.UNMODIFIED
830 action = Action.UNMODIFIED
831
831
832 if not self._newline_marker.match(line):
832 if not self._newline_marker.match(line):
833 old_line += affects_old
833 old_line += affects_old
834 new_line += affects_new
834 new_line += affects_new
835 lines.append({
835 lines.append({
836 'old_lineno': affects_old and old_line or '',
836 'old_lineno': affects_old and old_line or '',
837 'new_lineno': affects_new and new_line or '',
837 'new_lineno': affects_new and new_line or '',
838 'action': action,
838 'action': action,
839 'line': self._clean_line(line, command)
839 'line': self._clean_line(line, command)
840 })
840 })
841 raw_diff.append(line)
841 raw_diff.append(line)
842
842
843 line = lineiter.next()
843 line = lineiter.next()
844
844
845 if self._newline_marker.match(line):
845 if self._newline_marker.match(line):
846 # we need to append to lines, since this is not
846 # we need to append to lines, since this is not
847 # counted in the line specs of diff
847 # counted in the line specs of diff
848 if affects_old:
848 if affects_old:
849 action = Action.CONTEXT_OLD
849 action = Action.OLD_NO_NL
850 elif affects_new:
850 elif affects_new:
851 action = Action.CONTEXT_NEW
851 action = Action.NEW_NO_NL
852 else:
852 else:
853 raise Exception('invalid context for no newline')
853 raise Exception('invalid context for no newline')
854
854
855 lines.append({
855 lines.append({
856 'old_lineno': None,
856 'old_lineno': None,
857 'new_lineno': None,
857 'new_lineno': None,
858 'action': action,
858 'action': action,
859 'line': self._clean_line(line, command)
859 'line': self._clean_line(line, command)
860 })
860 })
861
861
862 except StopIteration:
862 except StopIteration:
863 pass
863 pass
864 return ''.join(raw_diff), chunks, stats
864 return ''.join(raw_diff), chunks, stats
865
865
866 def _safe_id(self, idstring):
866 def _safe_id(self, idstring):
867 """Make a string safe for including in an id attribute.
867 """Make a string safe for including in an id attribute.
868
868
869 The HTML spec says that id attributes 'must begin with
869 The HTML spec says that id attributes 'must begin with
870 a letter ([A-Za-z]) and may be followed by any number
870 a letter ([A-Za-z]) and may be followed by any number
871 of letters, digits ([0-9]), hyphens ("-"), underscores
871 of letters, digits ([0-9]), hyphens ("-"), underscores
872 ("_"), colons (":"), and periods (".")'. These regexps
872 ("_"), colons (":"), and periods (".")'. These regexps
873 are slightly over-zealous, in that they remove colons
873 are slightly over-zealous, in that they remove colons
874 and periods unnecessarily.
874 and periods unnecessarily.
875
875
876 Whitespace is transformed into underscores, and then
876 Whitespace is transformed into underscores, and then
877 anything which is not a hyphen or a character that
877 anything which is not a hyphen or a character that
878 matches \w (alphanumerics and underscore) is removed.
878 matches \w (alphanumerics and underscore) is removed.
879
879
880 """
880 """
881 # Transform all whitespace to underscore
881 # Transform all whitespace to underscore
882 idstring = re.sub(r'\s', "_", '%s' % idstring)
882 idstring = re.sub(r'\s', "_", '%s' % idstring)
883 # Remove everything that is not a hyphen or a member of \w
883 # Remove everything that is not a hyphen or a member of \w
884 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
884 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
885 return idstring
885 return idstring
886
886
887 def prepare(self, inline_diff=True):
887 def prepare(self, inline_diff=True):
888 """
888 """
889 Prepare the passed udiff for HTML rendering.
889 Prepare the passed udiff for HTML rendering.
890
890
891 :return: A list of dicts with diff information.
891 :return: A list of dicts with diff information.
892 """
892 """
893 parsed = self._parser(inline_diff=inline_diff)
893 parsed = self._parser(inline_diff=inline_diff)
894 self.parsed = True
894 self.parsed = True
895 self.parsed_diff = parsed
895 self.parsed_diff = parsed
896 return parsed
896 return parsed
897
897
898 def as_raw(self, diff_lines=None):
898 def as_raw(self, diff_lines=None):
899 """
899 """
900 Returns raw diff as a byte string
900 Returns raw diff as a byte string
901 """
901 """
902 return self._diff.raw
902 return self._diff.raw
903
903
904 def as_html(self, table_class='code-difftable', line_class='line',
904 def as_html(self, table_class='code-difftable', line_class='line',
905 old_lineno_class='lineno old', new_lineno_class='lineno new',
905 old_lineno_class='lineno old', new_lineno_class='lineno new',
906 code_class='code', enable_comments=False, parsed_lines=None):
906 code_class='code', enable_comments=False, parsed_lines=None):
907 """
907 """
908 Return given diff as html table with customized css classes
908 Return given diff as html table with customized css classes
909 """
909 """
910 def _link_to_if(condition, label, url):
910 def _link_to_if(condition, label, url):
911 """
911 """
912 Generates a link if condition is meet or just the label if not.
912 Generates a link if condition is meet or just the label if not.
913 """
913 """
914
914
915 if condition:
915 if condition:
916 return '''<a href="%(url)s" class="tooltip"
916 return '''<a href="%(url)s" class="tooltip"
917 title="%(title)s">%(label)s</a>''' % {
917 title="%(title)s">%(label)s</a>''' % {
918 'title': _('Click to select line'),
918 'title': _('Click to select line'),
919 'url': url,
919 'url': url,
920 'label': label
920 'label': label
921 }
921 }
922 else:
922 else:
923 return label
923 return label
924 if not self.parsed:
924 if not self.parsed:
925 self.prepare()
925 self.prepare()
926
926
927 diff_lines = self.parsed_diff
927 diff_lines = self.parsed_diff
928 if parsed_lines:
928 if parsed_lines:
929 diff_lines = parsed_lines
929 diff_lines = parsed_lines
930
930
931 _html_empty = True
931 _html_empty = True
932 _html = []
932 _html = []
933 _html.append('''<table class="%(table_class)s">\n''' % {
933 _html.append('''<table class="%(table_class)s">\n''' % {
934 'table_class': table_class
934 'table_class': table_class
935 })
935 })
936
936
937 for diff in diff_lines:
937 for diff in diff_lines:
938 for line in diff['chunks']:
938 for line in diff['chunks']:
939 _html_empty = False
939 _html_empty = False
940 for change in line:
940 for change in line:
941 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
941 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
942 'lc': line_class,
942 'lc': line_class,
943 'action': change['action']
943 'action': change['action']
944 })
944 })
945 anchor_old_id = ''
945 anchor_old_id = ''
946 anchor_new_id = ''
946 anchor_new_id = ''
947 anchor_old = "%(filename)s_o%(oldline_no)s" % {
947 anchor_old = "%(filename)s_o%(oldline_no)s" % {
948 'filename': self._safe_id(diff['filename']),
948 'filename': self._safe_id(diff['filename']),
949 'oldline_no': change['old_lineno']
949 'oldline_no': change['old_lineno']
950 }
950 }
951 anchor_new = "%(filename)s_n%(oldline_no)s" % {
951 anchor_new = "%(filename)s_n%(oldline_no)s" % {
952 'filename': self._safe_id(diff['filename']),
952 'filename': self._safe_id(diff['filename']),
953 'oldline_no': change['new_lineno']
953 'oldline_no': change['new_lineno']
954 }
954 }
955 cond_old = (change['old_lineno'] != '...' and
955 cond_old = (change['old_lineno'] != '...' and
956 change['old_lineno'])
956 change['old_lineno'])
957 cond_new = (change['new_lineno'] != '...' and
957 cond_new = (change['new_lineno'] != '...' and
958 change['new_lineno'])
958 change['new_lineno'])
959 if cond_old:
959 if cond_old:
960 anchor_old_id = 'id="%s"' % anchor_old
960 anchor_old_id = 'id="%s"' % anchor_old
961 if cond_new:
961 if cond_new:
962 anchor_new_id = 'id="%s"' % anchor_new
962 anchor_new_id = 'id="%s"' % anchor_new
963
963
964 if change['action'] != Action.CONTEXT:
964 if change['action'] != Action.CONTEXT:
965 anchor_link = True
965 anchor_link = True
966 else:
966 else:
967 anchor_link = False
967 anchor_link = False
968
968
969 ###########################################################
969 ###########################################################
970 # COMMENT ICONS
970 # COMMENT ICONS
971 ###########################################################
971 ###########################################################
972 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
972 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
973
973
974 if enable_comments and change['action'] != Action.CONTEXT:
974 if enable_comments and change['action'] != Action.CONTEXT:
975 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
975 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
976
976
977 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
977 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
978
978
979 ###########################################################
979 ###########################################################
980 # OLD LINE NUMBER
980 # OLD LINE NUMBER
981 ###########################################################
981 ###########################################################
982 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
982 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
983 'a_id': anchor_old_id,
983 'a_id': anchor_old_id,
984 'olc': old_lineno_class
984 'olc': old_lineno_class
985 })
985 })
986
986
987 _html.append('''%(link)s''' % {
987 _html.append('''%(link)s''' % {
988 'link': _link_to_if(anchor_link, change['old_lineno'],
988 'link': _link_to_if(anchor_link, change['old_lineno'],
989 '#%s' % anchor_old)
989 '#%s' % anchor_old)
990 })
990 })
991 _html.append('''</td>\n''')
991 _html.append('''</td>\n''')
992 ###########################################################
992 ###########################################################
993 # NEW LINE NUMBER
993 # NEW LINE NUMBER
994 ###########################################################
994 ###########################################################
995
995
996 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
996 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
997 'a_id': anchor_new_id,
997 'a_id': anchor_new_id,
998 'nlc': new_lineno_class
998 'nlc': new_lineno_class
999 })
999 })
1000
1000
1001 _html.append('''%(link)s''' % {
1001 _html.append('''%(link)s''' % {
1002 'link': _link_to_if(anchor_link, change['new_lineno'],
1002 'link': _link_to_if(anchor_link, change['new_lineno'],
1003 '#%s' % anchor_new)
1003 '#%s' % anchor_new)
1004 })
1004 })
1005 _html.append('''</td>\n''')
1005 _html.append('''</td>\n''')
1006 ###########################################################
1006 ###########################################################
1007 # CODE
1007 # CODE
1008 ###########################################################
1008 ###########################################################
1009 code_classes = [code_class]
1009 code_classes = [code_class]
1010 if (not enable_comments or
1010 if (not enable_comments or
1011 change['action'] == Action.CONTEXT):
1011 change['action'] == Action.CONTEXT):
1012 code_classes.append('no-comment')
1012 code_classes.append('no-comment')
1013 _html.append('\t<td class="%s">' % ' '.join(code_classes))
1013 _html.append('\t<td class="%s">' % ' '.join(code_classes))
1014 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
1014 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
1015 'code': change['line']
1015 'code': change['line']
1016 })
1016 })
1017
1017
1018 _html.append('''\t</td>''')
1018 _html.append('''\t</td>''')
1019 _html.append('''\n</tr>\n''')
1019 _html.append('''\n</tr>\n''')
1020 _html.append('''</table>''')
1020 _html.append('''</table>''')
1021 if _html_empty:
1021 if _html_empty:
1022 return None
1022 return None
1023 return ''.join(_html)
1023 return ''.join(_html)
1024
1024
1025 def stat(self):
1025 def stat(self):
1026 """
1026 """
1027 Returns tuple of added, and removed lines for this instance
1027 Returns tuple of added, and removed lines for this instance
1028 """
1028 """
1029 return self.adds, self.removes
1029 return self.adds, self.removes
1030
1030
1031 def get_context_of_line(
1031 def get_context_of_line(
1032 self, path, diff_line=None, context_before=3, context_after=3):
1032 self, path, diff_line=None, context_before=3, context_after=3):
1033 """
1033 """
1034 Returns the context lines for the specified diff line.
1034 Returns the context lines for the specified diff line.
1035
1035
1036 :type diff_line: :class:`DiffLineNumber`
1036 :type diff_line: :class:`DiffLineNumber`
1037 """
1037 """
1038 assert self.parsed, "DiffProcessor is not initialized."
1038 assert self.parsed, "DiffProcessor is not initialized."
1039
1039
1040 if None not in diff_line:
1040 if None not in diff_line:
1041 raise ValueError(
1041 raise ValueError(
1042 "Cannot specify both line numbers: {}".format(diff_line))
1042 "Cannot specify both line numbers: {}".format(diff_line))
1043
1043
1044 file_diff = self._get_file_diff(path)
1044 file_diff = self._get_file_diff(path)
1045 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1045 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1046
1046
1047 first_line_to_include = max(idx - context_before, 0)
1047 first_line_to_include = max(idx - context_before, 0)
1048 first_line_after_context = idx + context_after + 1
1048 first_line_after_context = idx + context_after + 1
1049 context_lines = chunk[first_line_to_include:first_line_after_context]
1049 context_lines = chunk[first_line_to_include:first_line_after_context]
1050
1050
1051 line_contents = [
1051 line_contents = [
1052 _context_line(line) for line in context_lines
1052 _context_line(line) for line in context_lines
1053 if _is_diff_content(line)]
1053 if _is_diff_content(line)]
1054 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1054 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1055 # Once they are fixed, we can drop this line here.
1055 # Once they are fixed, we can drop this line here.
1056 if line_contents:
1056 if line_contents:
1057 line_contents[-1] = (
1057 line_contents[-1] = (
1058 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1058 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1059 return line_contents
1059 return line_contents
1060
1060
1061 def find_context(self, path, context, offset=0):
1061 def find_context(self, path, context, offset=0):
1062 """
1062 """
1063 Finds the given `context` inside of the diff.
1063 Finds the given `context` inside of the diff.
1064
1064
1065 Use the parameter `offset` to specify which offset the target line has
1065 Use the parameter `offset` to specify which offset the target line has
1066 inside of the given `context`. This way the correct diff line will be
1066 inside of the given `context`. This way the correct diff line will be
1067 returned.
1067 returned.
1068
1068
1069 :param offset: Shall be used to specify the offset of the main line
1069 :param offset: Shall be used to specify the offset of the main line
1070 within the given `context`.
1070 within the given `context`.
1071 """
1071 """
1072 if offset < 0 or offset >= len(context):
1072 if offset < 0 or offset >= len(context):
1073 raise ValueError(
1073 raise ValueError(
1074 "Only positive values up to the length of the context "
1074 "Only positive values up to the length of the context "
1075 "minus one are allowed.")
1075 "minus one are allowed.")
1076
1076
1077 matches = []
1077 matches = []
1078 file_diff = self._get_file_diff(path)
1078 file_diff = self._get_file_diff(path)
1079
1079
1080 for chunk in file_diff['chunks']:
1080 for chunk in file_diff['chunks']:
1081 context_iter = iter(context)
1081 context_iter = iter(context)
1082 for line_idx, line in enumerate(chunk):
1082 for line_idx, line in enumerate(chunk):
1083 try:
1083 try:
1084 if _context_line(line) == context_iter.next():
1084 if _context_line(line) == context_iter.next():
1085 continue
1085 continue
1086 except StopIteration:
1086 except StopIteration:
1087 matches.append((line_idx, chunk))
1087 matches.append((line_idx, chunk))
1088 context_iter = iter(context)
1088 context_iter = iter(context)
1089
1089
1090 # Increment position and triger StopIteration
1090 # Increment position and triger StopIteration
1091 # if we had a match at the end
1091 # if we had a match at the end
1092 line_idx += 1
1092 line_idx += 1
1093 try:
1093 try:
1094 context_iter.next()
1094 context_iter.next()
1095 except StopIteration:
1095 except StopIteration:
1096 matches.append((line_idx, chunk))
1096 matches.append((line_idx, chunk))
1097
1097
1098 effective_offset = len(context) - offset
1098 effective_offset = len(context) - offset
1099 found_at_diff_lines = [
1099 found_at_diff_lines = [
1100 _line_to_diff_line_number(chunk[idx - effective_offset])
1100 _line_to_diff_line_number(chunk[idx - effective_offset])
1101 for idx, chunk in matches]
1101 for idx, chunk in matches]
1102
1102
1103 return found_at_diff_lines
1103 return found_at_diff_lines
1104
1104
1105 def _get_file_diff(self, path):
1105 def _get_file_diff(self, path):
1106 for file_diff in self.parsed_diff:
1106 for file_diff in self.parsed_diff:
1107 if file_diff['filename'] == path:
1107 if file_diff['filename'] == path:
1108 break
1108 break
1109 else:
1109 else:
1110 raise FileNotInDiffException("File {} not in diff".format(path))
1110 raise FileNotInDiffException("File {} not in diff".format(path))
1111 return file_diff
1111 return file_diff
1112
1112
1113 def _find_chunk_line_index(self, file_diff, diff_line):
1113 def _find_chunk_line_index(self, file_diff, diff_line):
1114 for chunk in file_diff['chunks']:
1114 for chunk in file_diff['chunks']:
1115 for idx, line in enumerate(chunk):
1115 for idx, line in enumerate(chunk):
1116 if line['old_lineno'] == diff_line.old:
1116 if line['old_lineno'] == diff_line.old:
1117 return chunk, idx
1117 return chunk, idx
1118 if line['new_lineno'] == diff_line.new:
1118 if line['new_lineno'] == diff_line.new:
1119 return chunk, idx
1119 return chunk, idx
1120 raise LineNotInDiffException(
1120 raise LineNotInDiffException(
1121 "The line {} is not part of the diff.".format(diff_line))
1121 "The line {} is not part of the diff.".format(diff_line))
1122
1122
1123
1123
1124 def _is_diff_content(line):
1124 def _is_diff_content(line):
1125 return line['action'] in (
1125 return line['action'] in (
1126 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1126 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1127
1127
1128
1128
1129 def _context_line(line):
1129 def _context_line(line):
1130 return (line['action'], line['line'])
1130 return (line['action'], line['line'])
1131
1131
1132
1132
1133 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1133 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1134
1134
1135
1135
1136 def _line_to_diff_line_number(line):
1136 def _line_to_diff_line_number(line):
1137 new_line_no = line['new_lineno'] or None
1137 new_line_no = line['new_lineno'] or None
1138 old_line_no = line['old_lineno'] or None
1138 old_line_no = line['old_lineno'] or None
1139 return DiffLineNumber(old=old_line_no, new=new_line_no)
1139 return DiffLineNumber(old=old_line_no, new=new_line_no)
1140
1140
1141
1141
1142 class FileNotInDiffException(Exception):
1142 class FileNotInDiffException(Exception):
1143 """
1143 """
1144 Raised when the context for a missing file is requested.
1144 Raised when the context for a missing file is requested.
1145
1145
1146 If you request the context for a line in a file which is not part of the
1146 If you request the context for a line in a file which is not part of the
1147 given diff, then this exception is raised.
1147 given diff, then this exception is raised.
1148 """
1148 """
1149
1149
1150
1150
1151 class LineNotInDiffException(Exception):
1151 class LineNotInDiffException(Exception):
1152 """
1152 """
1153 Raised when the context for a missing line is requested.
1153 Raised when the context for a missing line is requested.
1154
1154
1155 If you request the context for a line in a file and this line is not
1155 If you request the context for a line in a file and this line is not
1156 part of the given diff, then this exception is raised.
1156 part of the given diff, then this exception is raised.
1157 """
1157 """
1158
1158
1159
1159
1160 class DiffLimitExceeded(Exception):
1160 class DiffLimitExceeded(Exception):
1161 pass
1161 pass
General Comments 0
You need to be logged in to leave comments. Login now