##// END OF EJS Templates
diffs: fixed problem with rendering no newline at the end of file markers....
marcink -
r2380:8531a2ca default
parent child Browse files
Show More
@@ -0,0 +1,10
1 diff --git a/server.properties b/server.properties
2 --- a/server.properties
3 +++ b/server.properties
4 @@ -1,2 +1,3 @@
5 property=value
6 -anotherProperty=value
7 \ No newline at end of file
8 +anotherProperty=value
9 +newProperty=super_important_value
10 \ No newline at end of file No newline at end of file
@@ -1,711 +1,735
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2017 RhodeCode GmbH
3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 import difflib
23 from itertools import groupby
23 from itertools import groupby
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from rhodecode.lib.helpers import (
27 from rhodecode.lib.helpers import (
28 get_lexer_for_filenode, html_escape, get_custom_lexer)
28 get_lexer_for_filenode, html_escape, get_custom_lexer)
29 from rhodecode.lib.utils2 import AttributeDict
29 from rhodecode.lib.utils2 import AttributeDict
30 from rhodecode.lib.vcs.nodes import FileNode
30 from rhodecode.lib.vcs.nodes import FileNode
31 from rhodecode.lib.diff_match_patch import diff_match_patch
31 from rhodecode.lib.diff_match_patch import diff_match_patch
32 from rhodecode.lib.diffs import LimitedDiffContainer
32 from rhodecode.lib.diffs import LimitedDiffContainer
33 from pygments.lexers import get_lexer_by_name
33 from pygments.lexers import get_lexer_by_name
34
34
35 plain_text_lexer = get_lexer_by_name(
35 plain_text_lexer = get_lexer_by_name(
36 'text', stripall=False, stripnl=False, ensurenl=False)
36 'text', stripall=False, stripnl=False, ensurenl=False)
37
37
38
38
39 log = logging.getLogger()
39 log = logging.getLogger()
40
40
41
41
42 def filenode_as_lines_tokens(filenode, lexer=None):
42 def filenode_as_lines_tokens(filenode, lexer=None):
43 org_lexer = lexer
43 org_lexer = lexer
44 lexer = lexer or get_lexer_for_filenode(filenode)
44 lexer = lexer or get_lexer_for_filenode(filenode)
45 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
45 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
46 lexer, filenode, org_lexer)
46 lexer, filenode, org_lexer)
47 tokens = tokenize_string(filenode.content, lexer)
47 tokens = tokenize_string(filenode.content, lexer)
48 lines = split_token_stream(tokens, split_string='\n')
48 lines = split_token_stream(tokens, split_string='\n')
49 rv = list(lines)
49 rv = list(lines)
50 return rv
50 return rv
51
51
52
52
53 def tokenize_string(content, lexer):
53 def tokenize_string(content, lexer):
54 """
54 """
55 Use pygments to tokenize some content based on a lexer
55 Use pygments to tokenize some content based on a lexer
56 ensuring all original new lines and whitespace is preserved
56 ensuring all original new lines and whitespace is preserved
57 """
57 """
58
58
59 lexer.stripall = False
59 lexer.stripall = False
60 lexer.stripnl = False
60 lexer.stripnl = False
61 lexer.ensurenl = False
61 lexer.ensurenl = False
62 for token_type, token_text in lex(content, lexer):
62 for token_type, token_text in lex(content, lexer):
63 yield pygment_token_class(token_type), token_text
63 yield pygment_token_class(token_type), token_text
64
64
65
65
66 def split_token_stream(tokens, split_string=u'\n'):
66 def split_token_stream(tokens, split_string=u'\n'):
67 """
67 """
68 Take a list of (TokenType, text) tuples and split them by a string
68 Take a list of (TokenType, text) tuples and split them by a string
69
69
70 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
70 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
71 [(TEXT, 'some'), (TEXT, 'text'),
71 [(TEXT, 'some'), (TEXT, 'text'),
72 (TEXT, 'more'), (TEXT, 'text')]
72 (TEXT, 'more'), (TEXT, 'text')]
73 """
73 """
74
74
75 buffer = []
75 buffer = []
76 for token_class, token_text in tokens:
76 for token_class, token_text in tokens:
77 parts = token_text.split(split_string)
77 parts = token_text.split(split_string)
78 for part in parts[:-1]:
78 for part in parts[:-1]:
79 buffer.append((token_class, part))
79 buffer.append((token_class, part))
80 yield buffer
80 yield buffer
81 buffer = []
81 buffer = []
82
82
83 buffer.append((token_class, parts[-1]))
83 buffer.append((token_class, parts[-1]))
84
84
85 if buffer:
85 if buffer:
86 yield buffer
86 yield buffer
87
87
88
88
89 def filenode_as_annotated_lines_tokens(filenode):
89 def filenode_as_annotated_lines_tokens(filenode):
90 """
90 """
91 Take a file node and return a list of annotations => lines, if no annotation
91 Take a file node and return a list of annotations => lines, if no annotation
92 is found, it will be None.
92 is found, it will be None.
93
93
94 eg:
94 eg:
95
95
96 [
96 [
97 (annotation1, [
97 (annotation1, [
98 (1, line1_tokens_list),
98 (1, line1_tokens_list),
99 (2, line2_tokens_list),
99 (2, line2_tokens_list),
100 ]),
100 ]),
101 (annotation2, [
101 (annotation2, [
102 (3, line1_tokens_list),
102 (3, line1_tokens_list),
103 ]),
103 ]),
104 (None, [
104 (None, [
105 (4, line1_tokens_list),
105 (4, line1_tokens_list),
106 ]),
106 ]),
107 (annotation1, [
107 (annotation1, [
108 (5, line1_tokens_list),
108 (5, line1_tokens_list),
109 (6, line2_tokens_list),
109 (6, line2_tokens_list),
110 ])
110 ])
111 ]
111 ]
112 """
112 """
113
113
114 commit_cache = {} # cache commit_getter lookups
114 commit_cache = {} # cache commit_getter lookups
115
115
116 def _get_annotation(commit_id, commit_getter):
116 def _get_annotation(commit_id, commit_getter):
117 if commit_id not in commit_cache:
117 if commit_id not in commit_cache:
118 commit_cache[commit_id] = commit_getter()
118 commit_cache[commit_id] = commit_getter()
119 return commit_cache[commit_id]
119 return commit_cache[commit_id]
120
120
121 annotation_lookup = {
121 annotation_lookup = {
122 line_no: _get_annotation(commit_id, commit_getter)
122 line_no: _get_annotation(commit_id, commit_getter)
123 for line_no, commit_id, commit_getter, line_content
123 for line_no, commit_id, commit_getter, line_content
124 in filenode.annotate
124 in filenode.annotate
125 }
125 }
126
126
127 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
127 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
128 for line_no, tokens
128 for line_no, tokens
129 in enumerate(filenode_as_lines_tokens(filenode), 1))
129 in enumerate(filenode_as_lines_tokens(filenode), 1))
130
130
131 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
131 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
132
132
133 for annotation, group in grouped_annotations_lines:
133 for annotation, group in grouped_annotations_lines:
134 yield (
134 yield (
135 annotation, [(line_no, tokens)
135 annotation, [(line_no, tokens)
136 for (_, line_no, tokens) in group]
136 for (_, line_no, tokens) in group]
137 )
137 )
138
138
139
139
140 def render_tokenstream(tokenstream):
140 def render_tokenstream(tokenstream):
141 result = []
141 result = []
142 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
142 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
143
143
144 if token_class:
144 if token_class:
145 result.append(u'<span class="%s">' % token_class)
145 result.append(u'<span class="%s">' % token_class)
146 else:
146 else:
147 result.append(u'<span>')
147 result.append(u'<span>')
148
148
149 for op_tag, token_text in token_ops_texts:
149 for op_tag, token_text in token_ops_texts:
150
150
151 if op_tag:
151 if op_tag:
152 result.append(u'<%s>' % op_tag)
152 result.append(u'<%s>' % op_tag)
153
153
154 escaped_text = html_escape(token_text)
154 escaped_text = html_escape(token_text)
155
155
156 # TODO: dan: investigate showing hidden characters like space/nl/tab
156 # TODO: dan: investigate showing hidden characters like space/nl/tab
157 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
157 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
158 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
158 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
159 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
159 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
160
160
161 result.append(escaped_text)
161 result.append(escaped_text)
162
162
163 if op_tag:
163 if op_tag:
164 result.append(u'</%s>' % op_tag)
164 result.append(u'</%s>' % op_tag)
165
165
166 result.append(u'</span>')
166 result.append(u'</span>')
167
167
168 html = ''.join(result)
168 html = ''.join(result)
169 return html
169 return html
170
170
171
171
172 def rollup_tokenstream(tokenstream):
172 def rollup_tokenstream(tokenstream):
173 """
173 """
174 Group a token stream of the format:
174 Group a token stream of the format:
175
175
176 ('class', 'op', 'text')
176 ('class', 'op', 'text')
177 or
177 or
178 ('class', 'text')
178 ('class', 'text')
179
179
180 into
180 into
181
181
182 [('class1',
182 [('class1',
183 [('op1', 'text'),
183 [('op1', 'text'),
184 ('op2', 'text')]),
184 ('op2', 'text')]),
185 ('class2',
185 ('class2',
186 [('op3', 'text')])]
186 [('op3', 'text')])]
187
187
188 This is used to get the minimal tags necessary when
188 This is used to get the minimal tags necessary when
189 rendering to html eg for a token stream ie.
189 rendering to html eg for a token stream ie.
190
190
191 <span class="A"><ins>he</ins>llo</span>
191 <span class="A"><ins>he</ins>llo</span>
192 vs
192 vs
193 <span class="A"><ins>he</ins></span><span class="A">llo</span>
193 <span class="A"><ins>he</ins></span><span class="A">llo</span>
194
194
195 If a 2 tuple is passed in, the output op will be an empty string.
195 If a 2 tuple is passed in, the output op will be an empty string.
196
196
197 eg:
197 eg:
198
198
199 >>> rollup_tokenstream([('classA', '', 'h'),
199 >>> rollup_tokenstream([('classA', '', 'h'),
200 ('classA', 'del', 'ell'),
200 ('classA', 'del', 'ell'),
201 ('classA', '', 'o'),
201 ('classA', '', 'o'),
202 ('classB', '', ' '),
202 ('classB', '', ' '),
203 ('classA', '', 'the'),
203 ('classA', '', 'the'),
204 ('classA', '', 're'),
204 ('classA', '', 're'),
205 ])
205 ])
206
206
207 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
207 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
208 ('classB', [('', ' ')],
208 ('classB', [('', ' ')],
209 ('classA', [('', 'there')]]
209 ('classA', [('', 'there')]]
210
210
211 """
211 """
212 if tokenstream and len(tokenstream[0]) == 2:
212 if tokenstream and len(tokenstream[0]) == 2:
213 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
213 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
214
214
215 result = []
215 result = []
216 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
216 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
217 ops = []
217 ops = []
218 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
218 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
219 text_buffer = []
219 text_buffer = []
220 for t_class, t_op, t_text in token_text_list:
220 for t_class, t_op, t_text in token_text_list:
221 text_buffer.append(t_text)
221 text_buffer.append(t_text)
222 ops.append((token_op, ''.join(text_buffer)))
222 ops.append((token_op, ''.join(text_buffer)))
223 result.append((token_class, ops))
223 result.append((token_class, ops))
224 return result
224 return result
225
225
226
226
227 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
227 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
228 """
228 """
229 Converts a list of (token_class, token_text) tuples to a list of
229 Converts a list of (token_class, token_text) tuples to a list of
230 (token_class, token_op, token_text) tuples where token_op is one of
230 (token_class, token_op, token_text) tuples where token_op is one of
231 ('ins', 'del', '')
231 ('ins', 'del', '')
232
232
233 :param old_tokens: list of (token_class, token_text) tuples of old line
233 :param old_tokens: list of (token_class, token_text) tuples of old line
234 :param new_tokens: list of (token_class, token_text) tuples of new line
234 :param new_tokens: list of (token_class, token_text) tuples of new line
235 :param use_diff_match_patch: boolean, will use google's diff match patch
235 :param use_diff_match_patch: boolean, will use google's diff match patch
236 library which has options to 'smooth' out the character by character
236 library which has options to 'smooth' out the character by character
237 differences making nicer ins/del blocks
237 differences making nicer ins/del blocks
238 """
238 """
239
239
240 old_tokens_result = []
240 old_tokens_result = []
241 new_tokens_result = []
241 new_tokens_result = []
242
242
243 similarity = difflib.SequenceMatcher(None,
243 similarity = difflib.SequenceMatcher(None,
244 ''.join(token_text for token_class, token_text in old_tokens),
244 ''.join(token_text for token_class, token_text in old_tokens),
245 ''.join(token_text for token_class, token_text in new_tokens)
245 ''.join(token_text for token_class, token_text in new_tokens)
246 ).ratio()
246 ).ratio()
247
247
248 if similarity < 0.6: # return, the blocks are too different
248 if similarity < 0.6: # return, the blocks are too different
249 for token_class, token_text in old_tokens:
249 for token_class, token_text in old_tokens:
250 old_tokens_result.append((token_class, '', token_text))
250 old_tokens_result.append((token_class, '', token_text))
251 for token_class, token_text in new_tokens:
251 for token_class, token_text in new_tokens:
252 new_tokens_result.append((token_class, '', token_text))
252 new_tokens_result.append((token_class, '', token_text))
253 return old_tokens_result, new_tokens_result, similarity
253 return old_tokens_result, new_tokens_result, similarity
254
254
255 token_sequence_matcher = difflib.SequenceMatcher(None,
255 token_sequence_matcher = difflib.SequenceMatcher(None,
256 [x[1] for x in old_tokens],
256 [x[1] for x in old_tokens],
257 [x[1] for x in new_tokens])
257 [x[1] for x in new_tokens])
258
258
259 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
259 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
260 # check the differences by token block types first to give a more
260 # check the differences by token block types first to give a more
261 # nicer "block" level replacement vs character diffs
261 # nicer "block" level replacement vs character diffs
262
262
263 if tag == 'equal':
263 if tag == 'equal':
264 for token_class, token_text in old_tokens[o1:o2]:
264 for token_class, token_text in old_tokens[o1:o2]:
265 old_tokens_result.append((token_class, '', token_text))
265 old_tokens_result.append((token_class, '', token_text))
266 for token_class, token_text in new_tokens[n1:n2]:
266 for token_class, token_text in new_tokens[n1:n2]:
267 new_tokens_result.append((token_class, '', token_text))
267 new_tokens_result.append((token_class, '', token_text))
268 elif tag == 'delete':
268 elif tag == 'delete':
269 for token_class, token_text in old_tokens[o1:o2]:
269 for token_class, token_text in old_tokens[o1:o2]:
270 old_tokens_result.append((token_class, 'del', token_text))
270 old_tokens_result.append((token_class, 'del', token_text))
271 elif tag == 'insert':
271 elif tag == 'insert':
272 for token_class, token_text in new_tokens[n1:n2]:
272 for token_class, token_text in new_tokens[n1:n2]:
273 new_tokens_result.append((token_class, 'ins', token_text))
273 new_tokens_result.append((token_class, 'ins', token_text))
274 elif tag == 'replace':
274 elif tag == 'replace':
275 # if same type token blocks must be replaced, do a diff on the
275 # if same type token blocks must be replaced, do a diff on the
276 # characters in the token blocks to show individual changes
276 # characters in the token blocks to show individual changes
277
277
278 old_char_tokens = []
278 old_char_tokens = []
279 new_char_tokens = []
279 new_char_tokens = []
280 for token_class, token_text in old_tokens[o1:o2]:
280 for token_class, token_text in old_tokens[o1:o2]:
281 for char in token_text:
281 for char in token_text:
282 old_char_tokens.append((token_class, char))
282 old_char_tokens.append((token_class, char))
283
283
284 for token_class, token_text in new_tokens[n1:n2]:
284 for token_class, token_text in new_tokens[n1:n2]:
285 for char in token_text:
285 for char in token_text:
286 new_char_tokens.append((token_class, char))
286 new_char_tokens.append((token_class, char))
287
287
288 old_string = ''.join([token_text for
288 old_string = ''.join([token_text for
289 token_class, token_text in old_char_tokens])
289 token_class, token_text in old_char_tokens])
290 new_string = ''.join([token_text for
290 new_string = ''.join([token_text for
291 token_class, token_text in new_char_tokens])
291 token_class, token_text in new_char_tokens])
292
292
293 char_sequence = difflib.SequenceMatcher(
293 char_sequence = difflib.SequenceMatcher(
294 None, old_string, new_string)
294 None, old_string, new_string)
295 copcodes = char_sequence.get_opcodes()
295 copcodes = char_sequence.get_opcodes()
296 obuffer, nbuffer = [], []
296 obuffer, nbuffer = [], []
297
297
298 if use_diff_match_patch:
298 if use_diff_match_patch:
299 dmp = diff_match_patch()
299 dmp = diff_match_patch()
300 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
300 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
301 reps = dmp.diff_main(old_string, new_string)
301 reps = dmp.diff_main(old_string, new_string)
302 dmp.diff_cleanupEfficiency(reps)
302 dmp.diff_cleanupEfficiency(reps)
303
303
304 a, b = 0, 0
304 a, b = 0, 0
305 for op, rep in reps:
305 for op, rep in reps:
306 l = len(rep)
306 l = len(rep)
307 if op == 0:
307 if op == 0:
308 for i, c in enumerate(rep):
308 for i, c in enumerate(rep):
309 obuffer.append((old_char_tokens[a+i][0], '', c))
309 obuffer.append((old_char_tokens[a+i][0], '', c))
310 nbuffer.append((new_char_tokens[b+i][0], '', c))
310 nbuffer.append((new_char_tokens[b+i][0], '', c))
311 a += l
311 a += l
312 b += l
312 b += l
313 elif op == -1:
313 elif op == -1:
314 for i, c in enumerate(rep):
314 for i, c in enumerate(rep):
315 obuffer.append((old_char_tokens[a+i][0], 'del', c))
315 obuffer.append((old_char_tokens[a+i][0], 'del', c))
316 a += l
316 a += l
317 elif op == 1:
317 elif op == 1:
318 for i, c in enumerate(rep):
318 for i, c in enumerate(rep):
319 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
319 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
320 b += l
320 b += l
321 else:
321 else:
322 for ctag, co1, co2, cn1, cn2 in copcodes:
322 for ctag, co1, co2, cn1, cn2 in copcodes:
323 if ctag == 'equal':
323 if ctag == 'equal':
324 for token_class, token_text in old_char_tokens[co1:co2]:
324 for token_class, token_text in old_char_tokens[co1:co2]:
325 obuffer.append((token_class, '', token_text))
325 obuffer.append((token_class, '', token_text))
326 for token_class, token_text in new_char_tokens[cn1:cn2]:
326 for token_class, token_text in new_char_tokens[cn1:cn2]:
327 nbuffer.append((token_class, '', token_text))
327 nbuffer.append((token_class, '', token_text))
328 elif ctag == 'delete':
328 elif ctag == 'delete':
329 for token_class, token_text in old_char_tokens[co1:co2]:
329 for token_class, token_text in old_char_tokens[co1:co2]:
330 obuffer.append((token_class, 'del', token_text))
330 obuffer.append((token_class, 'del', token_text))
331 elif ctag == 'insert':
331 elif ctag == 'insert':
332 for token_class, token_text in new_char_tokens[cn1:cn2]:
332 for token_class, token_text in new_char_tokens[cn1:cn2]:
333 nbuffer.append((token_class, 'ins', token_text))
333 nbuffer.append((token_class, 'ins', token_text))
334 elif ctag == 'replace':
334 elif ctag == 'replace':
335 for token_class, token_text in old_char_tokens[co1:co2]:
335 for token_class, token_text in old_char_tokens[co1:co2]:
336 obuffer.append((token_class, 'del', token_text))
336 obuffer.append((token_class, 'del', token_text))
337 for token_class, token_text in new_char_tokens[cn1:cn2]:
337 for token_class, token_text in new_char_tokens[cn1:cn2]:
338 nbuffer.append((token_class, 'ins', token_text))
338 nbuffer.append((token_class, 'ins', token_text))
339
339
340 old_tokens_result.extend(obuffer)
340 old_tokens_result.extend(obuffer)
341 new_tokens_result.extend(nbuffer)
341 new_tokens_result.extend(nbuffer)
342
342
343 return old_tokens_result, new_tokens_result, similarity
343 return old_tokens_result, new_tokens_result, similarity
344
344
345
345
346 class DiffSet(object):
346 class DiffSet(object):
347 """
347 """
348 An object for parsing the diff result from diffs.DiffProcessor and
348 An object for parsing the diff result from diffs.DiffProcessor and
349 adding highlighting, side by side/unified renderings and line diffs
349 adding highlighting, side by side/unified renderings and line diffs
350 """
350 """
351
351
352 HL_REAL = 'REAL' # highlights using original file, slow
352 HL_REAL = 'REAL' # highlights using original file, slow
353 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
353 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
354 # in the case of multiline code
354 # in the case of multiline code
355 HL_NONE = 'NONE' # no highlighting, fastest
355 HL_NONE = 'NONE' # no highlighting, fastest
356
356
357 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
357 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
358 source_repo_name=None,
358 source_repo_name=None,
359 source_node_getter=lambda filename: None,
359 source_node_getter=lambda filename: None,
360 target_node_getter=lambda filename: None,
360 target_node_getter=lambda filename: None,
361 source_nodes=None, target_nodes=None,
361 source_nodes=None, target_nodes=None,
362 max_file_size_limit=150 * 1024, # files over this size will
362 max_file_size_limit=150 * 1024, # files over this size will
363 # use fast highlighting
363 # use fast highlighting
364 comments=None,
364 comments=None,
365 ):
365 ):
366
366
367 self.highlight_mode = highlight_mode
367 self.highlight_mode = highlight_mode
368 self.highlighted_filenodes = {}
368 self.highlighted_filenodes = {}
369 self.source_node_getter = source_node_getter
369 self.source_node_getter = source_node_getter
370 self.target_node_getter = target_node_getter
370 self.target_node_getter = target_node_getter
371 self.source_nodes = source_nodes or {}
371 self.source_nodes = source_nodes or {}
372 self.target_nodes = target_nodes or {}
372 self.target_nodes = target_nodes or {}
373 self.repo_name = repo_name
373 self.repo_name = repo_name
374 self.source_repo_name = source_repo_name or repo_name
374 self.source_repo_name = source_repo_name or repo_name
375 self.comments = comments or {}
375 self.comments = comments or {}
376 self.comments_store = self.comments.copy()
376 self.comments_store = self.comments.copy()
377 self.max_file_size_limit = max_file_size_limit
377 self.max_file_size_limit = max_file_size_limit
378
378
379 def render_patchset(self, patchset, source_ref=None, target_ref=None):
379 def render_patchset(self, patchset, source_ref=None, target_ref=None):
380 diffset = AttributeDict(dict(
380 diffset = AttributeDict(dict(
381 lines_added=0,
381 lines_added=0,
382 lines_deleted=0,
382 lines_deleted=0,
383 changed_files=0,
383 changed_files=0,
384 files=[],
384 files=[],
385 file_stats={},
385 file_stats={},
386 limited_diff=isinstance(patchset, LimitedDiffContainer),
386 limited_diff=isinstance(patchset, LimitedDiffContainer),
387 repo_name=self.repo_name,
387 repo_name=self.repo_name,
388 source_repo_name=self.source_repo_name,
388 source_repo_name=self.source_repo_name,
389 source_ref=source_ref,
389 source_ref=source_ref,
390 target_ref=target_ref,
390 target_ref=target_ref,
391 ))
391 ))
392 for patch in patchset:
392 for patch in patchset:
393 diffset.file_stats[patch['filename']] = patch['stats']
393 diffset.file_stats[patch['filename']] = patch['stats']
394 filediff = self.render_patch(patch)
394 filediff = self.render_patch(patch)
395 filediff.diffset = diffset
395 filediff.diffset = diffset
396 diffset.files.append(filediff)
396 diffset.files.append(filediff)
397 diffset.changed_files += 1
397 diffset.changed_files += 1
398 if not patch['stats']['binary']:
398 if not patch['stats']['binary']:
399 diffset.lines_added += patch['stats']['added']
399 diffset.lines_added += patch['stats']['added']
400 diffset.lines_deleted += patch['stats']['deleted']
400 diffset.lines_deleted += patch['stats']['deleted']
401
401
402 return diffset
402 return diffset
403
403
404 _lexer_cache = {}
404 _lexer_cache = {}
405
405
406 def _get_lexer_for_filename(self, filename, filenode=None):
406 def _get_lexer_for_filename(self, filename, filenode=None):
407 # cached because we might need to call it twice for source/target
407 # cached because we might need to call it twice for source/target
408 if filename not in self._lexer_cache:
408 if filename not in self._lexer_cache:
409 if filenode:
409 if filenode:
410 lexer = filenode.lexer
410 lexer = filenode.lexer
411 extension = filenode.extension
411 extension = filenode.extension
412 else:
412 else:
413 lexer = FileNode.get_lexer(filename=filename)
413 lexer = FileNode.get_lexer(filename=filename)
414 extension = filename.split('.')[-1]
414 extension = filename.split('.')[-1]
415
415
416 lexer = get_custom_lexer(extension) or lexer
416 lexer = get_custom_lexer(extension) or lexer
417 self._lexer_cache[filename] = lexer
417 self._lexer_cache[filename] = lexer
418 return self._lexer_cache[filename]
418 return self._lexer_cache[filename]
419
419
420 def render_patch(self, patch):
420 def render_patch(self, patch):
421 log.debug('rendering diff for %r' % patch['filename'])
421 log.debug('rendering diff for %r' % patch['filename'])
422
422
423 source_filename = patch['original_filename']
423 source_filename = patch['original_filename']
424 target_filename = patch['filename']
424 target_filename = patch['filename']
425
425
426 source_lexer = plain_text_lexer
426 source_lexer = plain_text_lexer
427 target_lexer = plain_text_lexer
427 target_lexer = plain_text_lexer
428
428
429 if not patch['stats']['binary']:
429 if not patch['stats']['binary']:
430 if self.highlight_mode == self.HL_REAL:
430 if self.highlight_mode == self.HL_REAL:
431 if (source_filename and patch['operation'] in ('D', 'M')
431 if (source_filename and patch['operation'] in ('D', 'M')
432 and source_filename not in self.source_nodes):
432 and source_filename not in self.source_nodes):
433 self.source_nodes[source_filename] = (
433 self.source_nodes[source_filename] = (
434 self.source_node_getter(source_filename))
434 self.source_node_getter(source_filename))
435
435
436 if (target_filename and patch['operation'] in ('A', 'M')
436 if (target_filename and patch['operation'] in ('A', 'M')
437 and target_filename not in self.target_nodes):
437 and target_filename not in self.target_nodes):
438 self.target_nodes[target_filename] = (
438 self.target_nodes[target_filename] = (
439 self.target_node_getter(target_filename))
439 self.target_node_getter(target_filename))
440
440
441 elif self.highlight_mode == self.HL_FAST:
441 elif self.highlight_mode == self.HL_FAST:
442 source_lexer = self._get_lexer_for_filename(source_filename)
442 source_lexer = self._get_lexer_for_filename(source_filename)
443 target_lexer = self._get_lexer_for_filename(target_filename)
443 target_lexer = self._get_lexer_for_filename(target_filename)
444
444
445 source_file = self.source_nodes.get(source_filename, source_filename)
445 source_file = self.source_nodes.get(source_filename, source_filename)
446 target_file = self.target_nodes.get(target_filename, target_filename)
446 target_file = self.target_nodes.get(target_filename, target_filename)
447
447
448 source_filenode, target_filenode = None, None
448 source_filenode, target_filenode = None, None
449
449
450 # TODO: dan: FileNode.lexer works on the content of the file - which
450 # TODO: dan: FileNode.lexer works on the content of the file - which
451 # can be slow - issue #4289 explains a lexer clean up - which once
451 # can be slow - issue #4289 explains a lexer clean up - which once
452 # done can allow caching a lexer for a filenode to avoid the file lookup
452 # done can allow caching a lexer for a filenode to avoid the file lookup
453 if isinstance(source_file, FileNode):
453 if isinstance(source_file, FileNode):
454 source_filenode = source_file
454 source_filenode = source_file
455 #source_lexer = source_file.lexer
455 #source_lexer = source_file.lexer
456 source_lexer = self._get_lexer_for_filename(source_filename)
456 source_lexer = self._get_lexer_for_filename(source_filename)
457 source_file.lexer = source_lexer
457 source_file.lexer = source_lexer
458
458
459 if isinstance(target_file, FileNode):
459 if isinstance(target_file, FileNode):
460 target_filenode = target_file
460 target_filenode = target_file
461 #target_lexer = target_file.lexer
461 #target_lexer = target_file.lexer
462 target_lexer = self._get_lexer_for_filename(target_filename)
462 target_lexer = self._get_lexer_for_filename(target_filename)
463 target_file.lexer = target_lexer
463 target_file.lexer = target_lexer
464
464
465 source_file_path, target_file_path = None, None
465 source_file_path, target_file_path = None, None
466
466
467 if source_filename != '/dev/null':
467 if source_filename != '/dev/null':
468 source_file_path = source_filename
468 source_file_path = source_filename
469 if target_filename != '/dev/null':
469 if target_filename != '/dev/null':
470 target_file_path = target_filename
470 target_file_path = target_filename
471
471
472 source_file_type = source_lexer.name
472 source_file_type = source_lexer.name
473 target_file_type = target_lexer.name
473 target_file_type = target_lexer.name
474
474
475 filediff = AttributeDict({
475 filediff = AttributeDict({
476 'source_file_path': source_file_path,
476 'source_file_path': source_file_path,
477 'target_file_path': target_file_path,
477 'target_file_path': target_file_path,
478 'source_filenode': source_filenode,
478 'source_filenode': source_filenode,
479 'target_filenode': target_filenode,
479 'target_filenode': target_filenode,
480 'source_file_type': target_file_type,
480 'source_file_type': target_file_type,
481 'target_file_type': source_file_type,
481 'target_file_type': source_file_type,
482 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
482 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
483 'operation': patch['operation'],
483 'operation': patch['operation'],
484 'source_mode': patch['stats']['old_mode'],
484 'source_mode': patch['stats']['old_mode'],
485 'target_mode': patch['stats']['new_mode'],
485 'target_mode': patch['stats']['new_mode'],
486 'limited_diff': isinstance(patch, LimitedDiffContainer),
486 'limited_diff': isinstance(patch, LimitedDiffContainer),
487 'hunks': [],
487 'hunks': [],
488 'diffset': self,
488 'diffset': self,
489 })
489 })
490
490
491 for hunk in patch['chunks'][1:]:
491 for hunk in patch['chunks'][1:]:
492 hunkbit = self.parse_hunk(hunk, source_file, target_file)
492 hunkbit = self.parse_hunk(hunk, source_file, target_file)
493 hunkbit.source_file_path = source_file_path
493 hunkbit.source_file_path = source_file_path
494 hunkbit.target_file_path = target_file_path
494 hunkbit.target_file_path = target_file_path
495 filediff.hunks.append(hunkbit)
495 filediff.hunks.append(hunkbit)
496
496
497 left_comments = {}
497 left_comments = {}
498 if source_file_path in self.comments_store:
498 if source_file_path in self.comments_store:
499 for lineno, comments in self.comments_store[source_file_path].items():
499 for lineno, comments in self.comments_store[source_file_path].items():
500 left_comments[lineno] = comments
500 left_comments[lineno] = comments
501
501
502 if target_file_path in self.comments_store:
502 if target_file_path in self.comments_store:
503 for lineno, comments in self.comments_store[target_file_path].items():
503 for lineno, comments in self.comments_store[target_file_path].items():
504 left_comments[lineno] = comments
504 left_comments[lineno] = comments
505 # left comments are one that we couldn't place in diff lines.
505 # left comments are one that we couldn't place in diff lines.
506 # could be outdated, or the diff changed and this line is no
506 # could be outdated, or the diff changed and this line is no
507 # longer available
507 # longer available
508 filediff.left_comments = left_comments
508 filediff.left_comments = left_comments
509
509
510 return filediff
510 return filediff
511
511
512 def parse_hunk(self, hunk, source_file, target_file):
512 def parse_hunk(self, hunk, source_file, target_file):
513 result = AttributeDict(dict(
513 result = AttributeDict(dict(
514 source_start=hunk['source_start'],
514 source_start=hunk['source_start'],
515 source_length=hunk['source_length'],
515 source_length=hunk['source_length'],
516 target_start=hunk['target_start'],
516 target_start=hunk['target_start'],
517 target_length=hunk['target_length'],
517 target_length=hunk['target_length'],
518 section_header=hunk['section_header'],
518 section_header=hunk['section_header'],
519 lines=[],
519 lines=[],
520 ))
520 ))
521 before, after = [], []
521 before, after = [], []
522
522
523 for line in hunk['lines']:
523 for line in hunk['lines']:
524
524
525 if line['action'] == 'unmod':
525 if line['action'] == 'unmod':
526 result.lines.extend(
526 result.lines.extend(
527 self.parse_lines(before, after, source_file, target_file))
527 self.parse_lines(before, after, source_file, target_file))
528 after.append(line)
528 after.append(line)
529 before.append(line)
529 before.append(line)
530 elif line['action'] == 'add':
530 elif line['action'] == 'add':
531 after.append(line)
531 after.append(line)
532 elif line['action'] == 'del':
532 elif line['action'] == 'del':
533 before.append(line)
533 before.append(line)
534 elif line['action'] == 'old-no-nl':
534 elif line['action'] == 'old-no-nl':
535 before.append(line)
535 before.append(line)
536 elif line['action'] == 'new-no-nl':
536 elif line['action'] == 'new-no-nl':
537 after.append(line)
537 after.append(line)
538
538
539 result.lines.extend(
539 result.lines.extend(
540 self.parse_lines(before, after, source_file, target_file))
540 self.parse_lines(before, after, source_file, target_file))
541 result.unified = self.as_unified(result.lines)
541 result.unified = self.as_unified(result.lines)
542 result.sideside = result.lines
542 result.sideside = result.lines
543
543
544 return result
544 return result
545
545
546 def parse_lines(self, before_lines, after_lines, source_file, target_file):
546 def parse_lines(self, before_lines, after_lines, source_file, target_file):
547 # TODO: dan: investigate doing the diff comparison and fast highlighting
547 # TODO: dan: investigate doing the diff comparison and fast highlighting
548 # on the entire before and after buffered block lines rather than by
548 # on the entire before and after buffered block lines rather than by
549 # line, this means we can get better 'fast' highlighting if the context
549 # line, this means we can get better 'fast' highlighting if the context
550 # allows it - eg.
550 # allows it - eg.
551 # line 4: """
551 # line 4: """
552 # line 5: this gets highlighted as a string
552 # line 5: this gets highlighted as a string
553 # line 6: """
553 # line 6: """
554
554
555 lines = []
555 lines = []
556
557 before_newline = AttributeDict()
558 after_newline = AttributeDict()
559 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
560 before_newline_line = before_lines.pop(-1)
561 before_newline.content = '\n {}'.format(
562 render_tokenstream(
563 [(x[0], '', x[1])
564 for x in [('nonl', before_newline_line['line'])]]))
565
566 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
567 after_newline_line = after_lines.pop(-1)
568 after_newline.content = '\n {}'.format(
569 render_tokenstream(
570 [(x[0], '', x[1])
571 for x in [('nonl', after_newline_line['line'])]]))
572
556 while before_lines or after_lines:
573 while before_lines or after_lines:
557 before, after = None, None
574 before, after = None, None
558 before_tokens, after_tokens = None, None
575 before_tokens, after_tokens = None, None
559
576
560 if before_lines:
577 if before_lines:
561 before = before_lines.pop(0)
578 before = before_lines.pop(0)
562 if after_lines:
579 if after_lines:
563 after = after_lines.pop(0)
580 after = after_lines.pop(0)
564
581
565 original = AttributeDict()
582 original = AttributeDict()
566 modified = AttributeDict()
583 modified = AttributeDict()
567
584
568 if before:
585 if before:
569 if before['action'] == 'old-no-nl':
586 if before['action'] == 'old-no-nl':
570 before_tokens = [('nonl', before['line'])]
587 before_tokens = [('nonl', before['line'])]
571 else:
588 else:
572 before_tokens = self.get_line_tokens(
589 before_tokens = self.get_line_tokens(
573 line_text=before['line'],
590 line_text=before['line'],
574 line_number=before['old_lineno'],
591 line_number=before['old_lineno'],
575 file=source_file)
592 file=source_file)
576 original.lineno = before['old_lineno']
593 original.lineno = before['old_lineno']
577 original.content = before['line']
594 original.content = before['line']
578 original.action = self.action_to_op(before['action'])
595 original.action = self.action_to_op(before['action'])
579 original.comments = self.get_comments_for('old',
596 original.comments = self.get_comments_for('old',
580 source_file, before['old_lineno'])
597 source_file, before['old_lineno'])
581
598
582 if after:
599 if after:
583 if after['action'] == 'new-no-nl':
600 if after['action'] == 'new-no-nl':
584 after_tokens = [('nonl', after['line'])]
601 after_tokens = [('nonl', after['line'])]
585 else:
602 else:
586 after_tokens = self.get_line_tokens(
603 after_tokens = self.get_line_tokens(
587 line_text=after['line'], line_number=after['new_lineno'],
604 line_text=after['line'], line_number=after['new_lineno'],
588 file=target_file)
605 file=target_file)
589 modified.lineno = after['new_lineno']
606 modified.lineno = after['new_lineno']
590 modified.content = after['line']
607 modified.content = after['line']
591 modified.action = self.action_to_op(after['action'])
608 modified.action = self.action_to_op(after['action'])
592 modified.comments = self.get_comments_for('new',
609 modified.comments = self.get_comments_for('new',
593 target_file, after['new_lineno'])
610 target_file, after['new_lineno'])
594
611
595 # diff the lines
612 # diff the lines
596 if before_tokens and after_tokens:
613 if before_tokens and after_tokens:
597 o_tokens, m_tokens, similarity = tokens_diff(
614 o_tokens, m_tokens, similarity = tokens_diff(
598 before_tokens, after_tokens)
615 before_tokens, after_tokens)
599 original.content = render_tokenstream(o_tokens)
616 original.content = render_tokenstream(o_tokens)
600 modified.content = render_tokenstream(m_tokens)
617 modified.content = render_tokenstream(m_tokens)
601 elif before_tokens:
618 elif before_tokens:
602 original.content = render_tokenstream(
619 original.content = render_tokenstream(
603 [(x[0], '', x[1]) for x in before_tokens])
620 [(x[0], '', x[1]) for x in before_tokens])
604 elif after_tokens:
621 elif after_tokens:
605 modified.content = render_tokenstream(
622 modified.content = render_tokenstream(
606 [(x[0], '', x[1]) for x in after_tokens])
623 [(x[0], '', x[1]) for x in after_tokens])
607
624
625 if not before_lines and before_newline:
626 original.content += before_newline.content
627 before_newline = None
628 if not after_lines and after_newline:
629 modified.content += after_newline.content
630 after_newline = None
631
608 lines.append(AttributeDict({
632 lines.append(AttributeDict({
609 'original': original,
633 'original': original,
610 'modified': modified,
634 'modified': modified,
611 }))
635 }))
612
636
613 return lines
637 return lines
614
638
615 def get_comments_for(self, version, filename, line_number):
639 def get_comments_for(self, version, filename, line_number):
616 if hasattr(filename, 'unicode_path'):
640 if hasattr(filename, 'unicode_path'):
617 filename = filename.unicode_path
641 filename = filename.unicode_path
618
642
619 if not isinstance(filename, basestring):
643 if not isinstance(filename, basestring):
620 return None
644 return None
621
645
622 line_key = {
646 line_key = {
623 'old': 'o',
647 'old': 'o',
624 'new': 'n',
648 'new': 'n',
625 }[version] + str(line_number)
649 }[version] + str(line_number)
626
650
627 if filename in self.comments_store:
651 if filename in self.comments_store:
628 file_comments = self.comments_store[filename]
652 file_comments = self.comments_store[filename]
629 if line_key in file_comments:
653 if line_key in file_comments:
630 return file_comments.pop(line_key)
654 return file_comments.pop(line_key)
631
655
632 def get_line_tokens(self, line_text, line_number, file=None):
656 def get_line_tokens(self, line_text, line_number, file=None):
633 filenode = None
657 filenode = None
634 filename = None
658 filename = None
635
659
636 if isinstance(file, basestring):
660 if isinstance(file, basestring):
637 filename = file
661 filename = file
638 elif isinstance(file, FileNode):
662 elif isinstance(file, FileNode):
639 filenode = file
663 filenode = file
640 filename = file.unicode_path
664 filename = file.unicode_path
641
665
642 if self.highlight_mode == self.HL_REAL and filenode:
666 if self.highlight_mode == self.HL_REAL and filenode:
643 lexer = self._get_lexer_for_filename(filename)
667 lexer = self._get_lexer_for_filename(filename)
644 file_size_allowed = file.size < self.max_file_size_limit
668 file_size_allowed = file.size < self.max_file_size_limit
645 if line_number and file_size_allowed:
669 if line_number and file_size_allowed:
646 return self.get_tokenized_filenode_line(
670 return self.get_tokenized_filenode_line(
647 file, line_number, lexer)
671 file, line_number, lexer)
648
672
649 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
673 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
650 lexer = self._get_lexer_for_filename(filename)
674 lexer = self._get_lexer_for_filename(filename)
651 return list(tokenize_string(line_text, lexer))
675 return list(tokenize_string(line_text, lexer))
652
676
653 return list(tokenize_string(line_text, plain_text_lexer))
677 return list(tokenize_string(line_text, plain_text_lexer))
654
678
655 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
679 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
656
680
657 if filenode not in self.highlighted_filenodes:
681 if filenode not in self.highlighted_filenodes:
658 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
682 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
659 self.highlighted_filenodes[filenode] = tokenized_lines
683 self.highlighted_filenodes[filenode] = tokenized_lines
660 return self.highlighted_filenodes[filenode][line_number - 1]
684 return self.highlighted_filenodes[filenode][line_number - 1]
661
685
662 def action_to_op(self, action):
686 def action_to_op(self, action):
663 return {
687 return {
664 'add': '+',
688 'add': '+',
665 'del': '-',
689 'del': '-',
666 'unmod': ' ',
690 'unmod': ' ',
667 'old-no-nl': ' ',
691 'old-no-nl': ' ',
668 'new-no-nl': ' ',
692 'new-no-nl': ' ',
669 }.get(action, action)
693 }.get(action, action)
670
694
671 def as_unified(self, lines):
695 def as_unified(self, lines):
672 """
696 """
673 Return a generator that yields the lines of a diff in unified order
697 Return a generator that yields the lines of a diff in unified order
674 """
698 """
675 def generator():
699 def generator():
676 buf = []
700 buf = []
677 for line in lines:
701 for line in lines:
678
702
679 if buf and not line.original or line.original.action == ' ':
703 if buf and not line.original or line.original.action == ' ':
680 for b in buf:
704 for b in buf:
681 yield b
705 yield b
682 buf = []
706 buf = []
683
707
684 if line.original:
708 if line.original:
685 if line.original.action == ' ':
709 if line.original.action == ' ':
686 yield (line.original.lineno, line.modified.lineno,
710 yield (line.original.lineno, line.modified.lineno,
687 line.original.action, line.original.content,
711 line.original.action, line.original.content,
688 line.original.comments)
712 line.original.comments)
689 continue
713 continue
690
714
691 if line.original.action == '-':
715 if line.original.action == '-':
692 yield (line.original.lineno, None,
716 yield (line.original.lineno, None,
693 line.original.action, line.original.content,
717 line.original.action, line.original.content,
694 line.original.comments)
718 line.original.comments)
695
719
696 if line.modified.action == '+':
720 if line.modified.action == '+':
697 buf.append((
721 buf.append((
698 None, line.modified.lineno,
722 None, line.modified.lineno,
699 line.modified.action, line.modified.content,
723 line.modified.action, line.modified.content,
700 line.modified.comments))
724 line.modified.comments))
701 continue
725 continue
702
726
703 if line.modified:
727 if line.modified:
704 yield (None, line.modified.lineno,
728 yield (None, line.modified.lineno,
705 line.modified.action, line.modified.content,
729 line.modified.action, line.modified.content,
706 line.modified.comments)
730 line.modified.comments)
707
731
708 for b in buf:
732 for b in buf:
709 yield b
733 yield b
710
734
711 return generator()
735 return generator()
@@ -1,1107 +1,1107
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2017 RhodeCode GmbH
3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25
26 import re
26 import re
27 import collections
27 import collections
28 import difflib
28 import difflib
29 import logging
29 import logging
30
30
31 from itertools import tee, imap
31 from itertools import tee, imap
32
32
33 from rhodecode.lib.vcs.exceptions import VCSError
33 from rhodecode.lib.vcs.exceptions import VCSError
34 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
34 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
35 from rhodecode.lib.utils2 import safe_unicode
35 from rhodecode.lib.utils2 import safe_unicode
36
36
37 log = logging.getLogger(__name__)
37 log = logging.getLogger(__name__)
38
38
39 # define max context, a file with more than this numbers of lines is unusable
39 # define max context, a file with more than this numbers of lines is unusable
40 # in browser anyway
40 # in browser anyway
41 MAX_CONTEXT = 1024 * 1014
41 MAX_CONTEXT = 1024 * 1014
42
42
43
43
44 class OPS(object):
44 class OPS(object):
45 ADD = 'A'
45 ADD = 'A'
46 MOD = 'M'
46 MOD = 'M'
47 DEL = 'D'
47 DEL = 'D'
48
48
49
49
50 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
50 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
51 """
51 """
52 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
52 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
53
53
54 :param ignore_whitespace: ignore whitespaces in diff
54 :param ignore_whitespace: ignore whitespaces in diff
55 """
55 """
56 # make sure we pass in default context
56 # make sure we pass in default context
57 context = context or 3
57 context = context or 3
58 # protect against IntOverflow when passing HUGE context
58 # protect against IntOverflow when passing HUGE context
59 if context > MAX_CONTEXT:
59 if context > MAX_CONTEXT:
60 context = MAX_CONTEXT
60 context = MAX_CONTEXT
61
61
62 submodules = filter(lambda o: isinstance(o, SubModuleNode),
62 submodules = filter(lambda o: isinstance(o, SubModuleNode),
63 [filenode_new, filenode_old])
63 [filenode_new, filenode_old])
64 if submodules:
64 if submodules:
65 return ''
65 return ''
66
66
67 for filenode in (filenode_old, filenode_new):
67 for filenode in (filenode_old, filenode_new):
68 if not isinstance(filenode, FileNode):
68 if not isinstance(filenode, FileNode):
69 raise VCSError(
69 raise VCSError(
70 "Given object should be FileNode object, not %s"
70 "Given object should be FileNode object, not %s"
71 % filenode.__class__)
71 % filenode.__class__)
72
72
73 repo = filenode_new.commit.repository
73 repo = filenode_new.commit.repository
74 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
74 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
75 new_commit = filenode_new.commit
75 new_commit = filenode_new.commit
76
76
77 vcs_gitdiff = repo.get_diff(
77 vcs_gitdiff = repo.get_diff(
78 old_commit, new_commit, filenode_new.path,
78 old_commit, new_commit, filenode_new.path,
79 ignore_whitespace, context, path1=filenode_old.path)
79 ignore_whitespace, context, path1=filenode_old.path)
80 return vcs_gitdiff
80 return vcs_gitdiff
81
81
82 NEW_FILENODE = 1
82 NEW_FILENODE = 1
83 DEL_FILENODE = 2
83 DEL_FILENODE = 2
84 MOD_FILENODE = 3
84 MOD_FILENODE = 3
85 RENAMED_FILENODE = 4
85 RENAMED_FILENODE = 4
86 COPIED_FILENODE = 5
86 COPIED_FILENODE = 5
87 CHMOD_FILENODE = 6
87 CHMOD_FILENODE = 6
88 BIN_FILENODE = 7
88 BIN_FILENODE = 7
89
89
90
90
91 class LimitedDiffContainer(object):
91 class LimitedDiffContainer(object):
92
92
93 def __init__(self, diff_limit, cur_diff_size, diff):
93 def __init__(self, diff_limit, cur_diff_size, diff):
94 self.diff = diff
94 self.diff = diff
95 self.diff_limit = diff_limit
95 self.diff_limit = diff_limit
96 self.cur_diff_size = cur_diff_size
96 self.cur_diff_size = cur_diff_size
97
97
98 def __getitem__(self, key):
98 def __getitem__(self, key):
99 return self.diff.__getitem__(key)
99 return self.diff.__getitem__(key)
100
100
101 def __iter__(self):
101 def __iter__(self):
102 for l in self.diff:
102 for l in self.diff:
103 yield l
103 yield l
104
104
105
105
106 class Action(object):
106 class Action(object):
107 """
107 """
108 Contains constants for the action value of the lines in a parsed diff.
108 Contains constants for the action value of the lines in a parsed diff.
109 """
109 """
110
110
111 ADD = 'add'
111 ADD = 'add'
112 DELETE = 'del'
112 DELETE = 'del'
113 UNMODIFIED = 'unmod'
113 UNMODIFIED = 'unmod'
114
114
115 CONTEXT = 'context'
115 CONTEXT = 'context'
116 OLD_NO_NL = 'old-no-nl'
116 OLD_NO_NL = 'old-no-nl'
117 NEW_NO_NL = 'new-no-nl'
117 NEW_NO_NL = 'new-no-nl'
118
118
119
119
120 class DiffProcessor(object):
120 class DiffProcessor(object):
121 """
121 """
122 Give it a unified or git diff and it returns a list of the files that were
122 Give it a unified or git diff and it returns a list of the files that were
123 mentioned in the diff together with a dict of meta information that
123 mentioned in the diff together with a dict of meta information that
124 can be used to render it in a HTML template.
124 can be used to render it in a HTML template.
125
125
126 .. note:: Unicode handling
126 .. note:: Unicode handling
127
127
128 The original diffs are a byte sequence and can contain filenames
128 The original diffs are a byte sequence and can contain filenames
129 in mixed encodings. This class generally returns `unicode` objects
129 in mixed encodings. This class generally returns `unicode` objects
130 since the result is intended for presentation to the user.
130 since the result is intended for presentation to the user.
131
131
132 """
132 """
133 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
133 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
134 _newline_marker = re.compile(r'^\\ No newline at end of file')
134 _newline_marker = re.compile(r'^\\ No newline at end of file')
135
135
136 # used for inline highlighter word split
136 # used for inline highlighter word split
137 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
137 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
138
138
139 # collapse ranges of commits over given number
139 # collapse ranges of commits over given number
140 _collapse_commits_over = 5
140 _collapse_commits_over = 5
141
141
142 def __init__(self, diff, format='gitdiff', diff_limit=None,
142 def __init__(self, diff, format='gitdiff', diff_limit=None,
143 file_limit=None, show_full_diff=True):
143 file_limit=None, show_full_diff=True):
144 """
144 """
145 :param diff: A `Diff` object representing a diff from a vcs backend
145 :param diff: A `Diff` object representing a diff from a vcs backend
146 :param format: format of diff passed, `udiff` or `gitdiff`
146 :param format: format of diff passed, `udiff` or `gitdiff`
147 :param diff_limit: define the size of diff that is considered "big"
147 :param diff_limit: define the size of diff that is considered "big"
148 based on that parameter cut off will be triggered, set to None
148 based on that parameter cut off will be triggered, set to None
149 to show full diff
149 to show full diff
150 """
150 """
151 self._diff = diff
151 self._diff = diff
152 self._format = format
152 self._format = format
153 self.adds = 0
153 self.adds = 0
154 self.removes = 0
154 self.removes = 0
155 # calculate diff size
155 # calculate diff size
156 self.diff_limit = diff_limit
156 self.diff_limit = diff_limit
157 self.file_limit = file_limit
157 self.file_limit = file_limit
158 self.show_full_diff = show_full_diff
158 self.show_full_diff = show_full_diff
159 self.cur_diff_size = 0
159 self.cur_diff_size = 0
160 self.parsed = False
160 self.parsed = False
161 self.parsed_diff = []
161 self.parsed_diff = []
162
162
163 log.debug('Initialized DiffProcessor with %s mode', format)
163 log.debug('Initialized DiffProcessor with %s mode', format)
164 if format == 'gitdiff':
164 if format == 'gitdiff':
165 self.differ = self._highlight_line_difflib
165 self.differ = self._highlight_line_difflib
166 self._parser = self._parse_gitdiff
166 self._parser = self._parse_gitdiff
167 else:
167 else:
168 self.differ = self._highlight_line_udiff
168 self.differ = self._highlight_line_udiff
169 self._parser = self._new_parse_gitdiff
169 self._parser = self._new_parse_gitdiff
170
170
171 def _copy_iterator(self):
171 def _copy_iterator(self):
172 """
172 """
173 make a fresh copy of generator, we should not iterate thru
173 make a fresh copy of generator, we should not iterate thru
174 an original as it's needed for repeating operations on
174 an original as it's needed for repeating operations on
175 this instance of DiffProcessor
175 this instance of DiffProcessor
176 """
176 """
177 self.__udiff, iterator_copy = tee(self.__udiff)
177 self.__udiff, iterator_copy = tee(self.__udiff)
178 return iterator_copy
178 return iterator_copy
179
179
180 def _escaper(self, string):
180 def _escaper(self, string):
181 """
181 """
182 Escaper for diff escapes special chars and checks the diff limit
182 Escaper for diff escapes special chars and checks the diff limit
183
183
184 :param string:
184 :param string:
185 """
185 """
186
186
187 self.cur_diff_size += len(string)
187 self.cur_diff_size += len(string)
188
188
189 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
189 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
190 raise DiffLimitExceeded('Diff Limit Exceeded')
190 raise DiffLimitExceeded('Diff Limit Exceeded')
191
191
192 return safe_unicode(string)\
192 return safe_unicode(string)\
193 .replace('&', '&amp;')\
193 .replace('&', '&amp;')\
194 .replace('<', '&lt;')\
194 .replace('<', '&lt;')\
195 .replace('>', '&gt;')
195 .replace('>', '&gt;')
196
196
197 def _line_counter(self, l):
197 def _line_counter(self, l):
198 """
198 """
199 Checks each line and bumps total adds/removes for this diff
199 Checks each line and bumps total adds/removes for this diff
200
200
201 :param l:
201 :param l:
202 """
202 """
203 if l.startswith('+') and not l.startswith('+++'):
203 if l.startswith('+') and not l.startswith('+++'):
204 self.adds += 1
204 self.adds += 1
205 elif l.startswith('-') and not l.startswith('---'):
205 elif l.startswith('-') and not l.startswith('---'):
206 self.removes += 1
206 self.removes += 1
207 return safe_unicode(l)
207 return safe_unicode(l)
208
208
209 def _highlight_line_difflib(self, line, next_):
209 def _highlight_line_difflib(self, line, next_):
210 """
210 """
211 Highlight inline changes in both lines.
211 Highlight inline changes in both lines.
212 """
212 """
213
213
214 if line['action'] == Action.DELETE:
214 if line['action'] == Action.DELETE:
215 old, new = line, next_
215 old, new = line, next_
216 else:
216 else:
217 old, new = next_, line
217 old, new = next_, line
218
218
219 oldwords = self._token_re.split(old['line'])
219 oldwords = self._token_re.split(old['line'])
220 newwords = self._token_re.split(new['line'])
220 newwords = self._token_re.split(new['line'])
221 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
221 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
222
222
223 oldfragments, newfragments = [], []
223 oldfragments, newfragments = [], []
224 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
224 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
225 oldfrag = ''.join(oldwords[i1:i2])
225 oldfrag = ''.join(oldwords[i1:i2])
226 newfrag = ''.join(newwords[j1:j2])
226 newfrag = ''.join(newwords[j1:j2])
227 if tag != 'equal':
227 if tag != 'equal':
228 if oldfrag:
228 if oldfrag:
229 oldfrag = '<del>%s</del>' % oldfrag
229 oldfrag = '<del>%s</del>' % oldfrag
230 if newfrag:
230 if newfrag:
231 newfrag = '<ins>%s</ins>' % newfrag
231 newfrag = '<ins>%s</ins>' % newfrag
232 oldfragments.append(oldfrag)
232 oldfragments.append(oldfrag)
233 newfragments.append(newfrag)
233 newfragments.append(newfrag)
234
234
235 old['line'] = "".join(oldfragments)
235 old['line'] = "".join(oldfragments)
236 new['line'] = "".join(newfragments)
236 new['line'] = "".join(newfragments)
237
237
238 def _highlight_line_udiff(self, line, next_):
238 def _highlight_line_udiff(self, line, next_):
239 """
239 """
240 Highlight inline changes in both lines.
240 Highlight inline changes in both lines.
241 """
241 """
242 start = 0
242 start = 0
243 limit = min(len(line['line']), len(next_['line']))
243 limit = min(len(line['line']), len(next_['line']))
244 while start < limit and line['line'][start] == next_['line'][start]:
244 while start < limit and line['line'][start] == next_['line'][start]:
245 start += 1
245 start += 1
246 end = -1
246 end = -1
247 limit -= start
247 limit -= start
248 while -end <= limit and line['line'][end] == next_['line'][end]:
248 while -end <= limit and line['line'][end] == next_['line'][end]:
249 end -= 1
249 end -= 1
250 end += 1
250 end += 1
251 if start or end:
251 if start or end:
252 def do(l):
252 def do(l):
253 last = end + len(l['line'])
253 last = end + len(l['line'])
254 if l['action'] == Action.ADD:
254 if l['action'] == Action.ADD:
255 tag = 'ins'
255 tag = 'ins'
256 else:
256 else:
257 tag = 'del'
257 tag = 'del'
258 l['line'] = '%s<%s>%s</%s>%s' % (
258 l['line'] = '%s<%s>%s</%s>%s' % (
259 l['line'][:start],
259 l['line'][:start],
260 tag,
260 tag,
261 l['line'][start:last],
261 l['line'][start:last],
262 tag,
262 tag,
263 l['line'][last:]
263 l['line'][last:]
264 )
264 )
265 do(line)
265 do(line)
266 do(next_)
266 do(next_)
267
267
268 def _clean_line(self, line, command):
268 def _clean_line(self, line, command):
269 if command in ['+', '-', ' ']:
269 if command in ['+', '-', ' ']:
270 # only modify the line if it's actually a diff thing
270 # only modify the line if it's actually a diff thing
271 line = line[1:]
271 line = line[1:]
272 return line
272 return line
273
273
274 def _parse_gitdiff(self, inline_diff=True):
274 def _parse_gitdiff(self, inline_diff=True):
275 _files = []
275 _files = []
276 diff_container = lambda arg: arg
276 diff_container = lambda arg: arg
277
277
278 for chunk in self._diff.chunks():
278 for chunk in self._diff.chunks():
279 head = chunk.header
279 head = chunk.header
280
280
281 diff = imap(self._escaper, chunk.diff.splitlines(1))
281 diff = imap(self._escaper, chunk.diff.splitlines(1))
282 raw_diff = chunk.raw
282 raw_diff = chunk.raw
283 limited_diff = False
283 limited_diff = False
284 exceeds_limit = False
284 exceeds_limit = False
285
285
286 op = None
286 op = None
287 stats = {
287 stats = {
288 'added': 0,
288 'added': 0,
289 'deleted': 0,
289 'deleted': 0,
290 'binary': False,
290 'binary': False,
291 'ops': {},
291 'ops': {},
292 }
292 }
293
293
294 if head['deleted_file_mode']:
294 if head['deleted_file_mode']:
295 op = OPS.DEL
295 op = OPS.DEL
296 stats['binary'] = True
296 stats['binary'] = True
297 stats['ops'][DEL_FILENODE] = 'deleted file'
297 stats['ops'][DEL_FILENODE] = 'deleted file'
298
298
299 elif head['new_file_mode']:
299 elif head['new_file_mode']:
300 op = OPS.ADD
300 op = OPS.ADD
301 stats['binary'] = True
301 stats['binary'] = True
302 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
302 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
303 else: # modify operation, can be copy, rename or chmod
303 else: # modify operation, can be copy, rename or chmod
304
304
305 # CHMOD
305 # CHMOD
306 if head['new_mode'] and head['old_mode']:
306 if head['new_mode'] and head['old_mode']:
307 op = OPS.MOD
307 op = OPS.MOD
308 stats['binary'] = True
308 stats['binary'] = True
309 stats['ops'][CHMOD_FILENODE] = (
309 stats['ops'][CHMOD_FILENODE] = (
310 'modified file chmod %s => %s' % (
310 'modified file chmod %s => %s' % (
311 head['old_mode'], head['new_mode']))
311 head['old_mode'], head['new_mode']))
312 # RENAME
312 # RENAME
313 if head['rename_from'] != head['rename_to']:
313 if head['rename_from'] != head['rename_to']:
314 op = OPS.MOD
314 op = OPS.MOD
315 stats['binary'] = True
315 stats['binary'] = True
316 stats['ops'][RENAMED_FILENODE] = (
316 stats['ops'][RENAMED_FILENODE] = (
317 'file renamed from %s to %s' % (
317 'file renamed from %s to %s' % (
318 head['rename_from'], head['rename_to']))
318 head['rename_from'], head['rename_to']))
319 # COPY
319 # COPY
320 if head.get('copy_from') and head.get('copy_to'):
320 if head.get('copy_from') and head.get('copy_to'):
321 op = OPS.MOD
321 op = OPS.MOD
322 stats['binary'] = True
322 stats['binary'] = True
323 stats['ops'][COPIED_FILENODE] = (
323 stats['ops'][COPIED_FILENODE] = (
324 'file copied from %s to %s' % (
324 'file copied from %s to %s' % (
325 head['copy_from'], head['copy_to']))
325 head['copy_from'], head['copy_to']))
326
326
327 # If our new parsed headers didn't match anything fallback to
327 # If our new parsed headers didn't match anything fallback to
328 # old style detection
328 # old style detection
329 if op is None:
329 if op is None:
330 if not head['a_file'] and head['b_file']:
330 if not head['a_file'] and head['b_file']:
331 op = OPS.ADD
331 op = OPS.ADD
332 stats['binary'] = True
332 stats['binary'] = True
333 stats['ops'][NEW_FILENODE] = 'new file'
333 stats['ops'][NEW_FILENODE] = 'new file'
334
334
335 elif head['a_file'] and not head['b_file']:
335 elif head['a_file'] and not head['b_file']:
336 op = OPS.DEL
336 op = OPS.DEL
337 stats['binary'] = True
337 stats['binary'] = True
338 stats['ops'][DEL_FILENODE] = 'deleted file'
338 stats['ops'][DEL_FILENODE] = 'deleted file'
339
339
340 # it's not ADD not DELETE
340 # it's not ADD not DELETE
341 if op is None:
341 if op is None:
342 op = OPS.MOD
342 op = OPS.MOD
343 stats['binary'] = True
343 stats['binary'] = True
344 stats['ops'][MOD_FILENODE] = 'modified file'
344 stats['ops'][MOD_FILENODE] = 'modified file'
345
345
346 # a real non-binary diff
346 # a real non-binary diff
347 if head['a_file'] or head['b_file']:
347 if head['a_file'] or head['b_file']:
348 try:
348 try:
349 raw_diff, chunks, _stats = self._parse_lines(diff)
349 raw_diff, chunks, _stats = self._parse_lines(diff)
350 stats['binary'] = False
350 stats['binary'] = False
351 stats['added'] = _stats[0]
351 stats['added'] = _stats[0]
352 stats['deleted'] = _stats[1]
352 stats['deleted'] = _stats[1]
353 # explicit mark that it's a modified file
353 # explicit mark that it's a modified file
354 if op == OPS.MOD:
354 if op == OPS.MOD:
355 stats['ops'][MOD_FILENODE] = 'modified file'
355 stats['ops'][MOD_FILENODE] = 'modified file'
356 exceeds_limit = len(raw_diff) > self.file_limit
356 exceeds_limit = len(raw_diff) > self.file_limit
357
357
358 # changed from _escaper function so we validate size of
358 # changed from _escaper function so we validate size of
359 # each file instead of the whole diff
359 # each file instead of the whole diff
360 # diff will hide big files but still show small ones
360 # diff will hide big files but still show small ones
361 # from my tests, big files are fairly safe to be parsed
361 # from my tests, big files are fairly safe to be parsed
362 # but the browser is the bottleneck
362 # but the browser is the bottleneck
363 if not self.show_full_diff and exceeds_limit:
363 if not self.show_full_diff and exceeds_limit:
364 raise DiffLimitExceeded('File Limit Exceeded')
364 raise DiffLimitExceeded('File Limit Exceeded')
365
365
366 except DiffLimitExceeded:
366 except DiffLimitExceeded:
367 diff_container = lambda _diff: \
367 diff_container = lambda _diff: \
368 LimitedDiffContainer(
368 LimitedDiffContainer(
369 self.diff_limit, self.cur_diff_size, _diff)
369 self.diff_limit, self.cur_diff_size, _diff)
370
370
371 exceeds_limit = len(raw_diff) > self.file_limit
371 exceeds_limit = len(raw_diff) > self.file_limit
372 limited_diff = True
372 limited_diff = True
373 chunks = []
373 chunks = []
374
374
375 else: # GIT format binary patch, or possibly empty diff
375 else: # GIT format binary patch, or possibly empty diff
376 if head['bin_patch']:
376 if head['bin_patch']:
377 # we have operation already extracted, but we mark simply
377 # we have operation already extracted, but we mark simply
378 # it's a diff we wont show for binary files
378 # it's a diff we wont show for binary files
379 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
379 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
380 chunks = []
380 chunks = []
381
381
382 if chunks and not self.show_full_diff and op == OPS.DEL:
382 if chunks and not self.show_full_diff and op == OPS.DEL:
383 # if not full diff mode show deleted file contents
383 # if not full diff mode show deleted file contents
384 # TODO: anderson: if the view is not too big, there is no way
384 # TODO: anderson: if the view is not too big, there is no way
385 # to see the content of the file
385 # to see the content of the file
386 chunks = []
386 chunks = []
387
387
388 chunks.insert(0, [{
388 chunks.insert(0, [{
389 'old_lineno': '',
389 'old_lineno': '',
390 'new_lineno': '',
390 'new_lineno': '',
391 'action': Action.CONTEXT,
391 'action': Action.CONTEXT,
392 'line': msg,
392 'line': msg,
393 } for _op, msg in stats['ops'].iteritems()
393 } for _op, msg in stats['ops'].iteritems()
394 if _op not in [MOD_FILENODE]])
394 if _op not in [MOD_FILENODE]])
395
395
396 _files.append({
396 _files.append({
397 'filename': safe_unicode(head['b_path']),
397 'filename': safe_unicode(head['b_path']),
398 'old_revision': head['a_blob_id'],
398 'old_revision': head['a_blob_id'],
399 'new_revision': head['b_blob_id'],
399 'new_revision': head['b_blob_id'],
400 'chunks': chunks,
400 'chunks': chunks,
401 'raw_diff': safe_unicode(raw_diff),
401 'raw_diff': safe_unicode(raw_diff),
402 'operation': op,
402 'operation': op,
403 'stats': stats,
403 'stats': stats,
404 'exceeds_limit': exceeds_limit,
404 'exceeds_limit': exceeds_limit,
405 'is_limited_diff': limited_diff,
405 'is_limited_diff': limited_diff,
406 })
406 })
407
407
408 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
408 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
409 OPS.DEL: 2}.get(info['operation'])
409 OPS.DEL: 2}.get(info['operation'])
410
410
411 if not inline_diff:
411 if not inline_diff:
412 return diff_container(sorted(_files, key=sorter))
412 return diff_container(sorted(_files, key=sorter))
413
413
414 # highlight inline changes
414 # highlight inline changes
415 for diff_data in _files:
415 for diff_data in _files:
416 for chunk in diff_data['chunks']:
416 for chunk in diff_data['chunks']:
417 lineiter = iter(chunk)
417 lineiter = iter(chunk)
418 try:
418 try:
419 while 1:
419 while 1:
420 line = lineiter.next()
420 line = lineiter.next()
421 if line['action'] not in (
421 if line['action'] not in (
422 Action.UNMODIFIED, Action.CONTEXT):
422 Action.UNMODIFIED, Action.CONTEXT):
423 nextline = lineiter.next()
423 nextline = lineiter.next()
424 if nextline['action'] in ['unmod', 'context'] or \
424 if nextline['action'] in ['unmod', 'context'] or \
425 nextline['action'] == line['action']:
425 nextline['action'] == line['action']:
426 continue
426 continue
427 self.differ(line, nextline)
427 self.differ(line, nextline)
428 except StopIteration:
428 except StopIteration:
429 pass
429 pass
430
430
431 return diff_container(sorted(_files, key=sorter))
431 return diff_container(sorted(_files, key=sorter))
432
432
433 def _check_large_diff(self):
433 def _check_large_diff(self):
434 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
434 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
435 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
435 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
436 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
436 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
437
437
438 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
438 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
439 def _new_parse_gitdiff(self, inline_diff=True):
439 def _new_parse_gitdiff(self, inline_diff=True):
440 _files = []
440 _files = []
441
441
442 # this can be overriden later to a LimitedDiffContainer type
442 # this can be overriden later to a LimitedDiffContainer type
443 diff_container = lambda arg: arg
443 diff_container = lambda arg: arg
444
444
445 for chunk in self._diff.chunks():
445 for chunk in self._diff.chunks():
446 head = chunk.header
446 head = chunk.header
447 log.debug('parsing diff %r' % head)
447 log.debug('parsing diff %r' % head)
448
448
449 raw_diff = chunk.raw
449 raw_diff = chunk.raw
450 limited_diff = False
450 limited_diff = False
451 exceeds_limit = False
451 exceeds_limit = False
452
452
453 op = None
453 op = None
454 stats = {
454 stats = {
455 'added': 0,
455 'added': 0,
456 'deleted': 0,
456 'deleted': 0,
457 'binary': False,
457 'binary': False,
458 'old_mode': None,
458 'old_mode': None,
459 'new_mode': None,
459 'new_mode': None,
460 'ops': {},
460 'ops': {},
461 }
461 }
462 if head['old_mode']:
462 if head['old_mode']:
463 stats['old_mode'] = head['old_mode']
463 stats['old_mode'] = head['old_mode']
464 if head['new_mode']:
464 if head['new_mode']:
465 stats['new_mode'] = head['new_mode']
465 stats['new_mode'] = head['new_mode']
466 if head['b_mode']:
466 if head['b_mode']:
467 stats['new_mode'] = head['b_mode']
467 stats['new_mode'] = head['b_mode']
468
468
469 # delete file
469 # delete file
470 if head['deleted_file_mode']:
470 if head['deleted_file_mode']:
471 op = OPS.DEL
471 op = OPS.DEL
472 stats['binary'] = True
472 stats['binary'] = True
473 stats['ops'][DEL_FILENODE] = 'deleted file'
473 stats['ops'][DEL_FILENODE] = 'deleted file'
474
474
475 # new file
475 # new file
476 elif head['new_file_mode']:
476 elif head['new_file_mode']:
477 op = OPS.ADD
477 op = OPS.ADD
478 stats['binary'] = True
478 stats['binary'] = True
479 stats['old_mode'] = None
479 stats['old_mode'] = None
480 stats['new_mode'] = head['new_file_mode']
480 stats['new_mode'] = head['new_file_mode']
481 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
481 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
482
482
483 # modify operation, can be copy, rename or chmod
483 # modify operation, can be copy, rename or chmod
484 else:
484 else:
485 # CHMOD
485 # CHMOD
486 if head['new_mode'] and head['old_mode']:
486 if head['new_mode'] and head['old_mode']:
487 op = OPS.MOD
487 op = OPS.MOD
488 stats['binary'] = True
488 stats['binary'] = True
489 stats['ops'][CHMOD_FILENODE] = (
489 stats['ops'][CHMOD_FILENODE] = (
490 'modified file chmod %s => %s' % (
490 'modified file chmod %s => %s' % (
491 head['old_mode'], head['new_mode']))
491 head['old_mode'], head['new_mode']))
492
492
493 # RENAME
493 # RENAME
494 if head['rename_from'] != head['rename_to']:
494 if head['rename_from'] != head['rename_to']:
495 op = OPS.MOD
495 op = OPS.MOD
496 stats['binary'] = True
496 stats['binary'] = True
497 stats['renamed'] = (head['rename_from'], head['rename_to'])
497 stats['renamed'] = (head['rename_from'], head['rename_to'])
498 stats['ops'][RENAMED_FILENODE] = (
498 stats['ops'][RENAMED_FILENODE] = (
499 'file renamed from %s to %s' % (
499 'file renamed from %s to %s' % (
500 head['rename_from'], head['rename_to']))
500 head['rename_from'], head['rename_to']))
501 # COPY
501 # COPY
502 if head.get('copy_from') and head.get('copy_to'):
502 if head.get('copy_from') and head.get('copy_to'):
503 op = OPS.MOD
503 op = OPS.MOD
504 stats['binary'] = True
504 stats['binary'] = True
505 stats['copied'] = (head['copy_from'], head['copy_to'])
505 stats['copied'] = (head['copy_from'], head['copy_to'])
506 stats['ops'][COPIED_FILENODE] = (
506 stats['ops'][COPIED_FILENODE] = (
507 'file copied from %s to %s' % (
507 'file copied from %s to %s' % (
508 head['copy_from'], head['copy_to']))
508 head['copy_from'], head['copy_to']))
509
509
510 # If our new parsed headers didn't match anything fallback to
510 # If our new parsed headers didn't match anything fallback to
511 # old style detection
511 # old style detection
512 if op is None:
512 if op is None:
513 if not head['a_file'] and head['b_file']:
513 if not head['a_file'] and head['b_file']:
514 op = OPS.ADD
514 op = OPS.ADD
515 stats['binary'] = True
515 stats['binary'] = True
516 stats['new_file'] = True
516 stats['new_file'] = True
517 stats['ops'][NEW_FILENODE] = 'new file'
517 stats['ops'][NEW_FILENODE] = 'new file'
518
518
519 elif head['a_file'] and not head['b_file']:
519 elif head['a_file'] and not head['b_file']:
520 op = OPS.DEL
520 op = OPS.DEL
521 stats['binary'] = True
521 stats['binary'] = True
522 stats['ops'][DEL_FILENODE] = 'deleted file'
522 stats['ops'][DEL_FILENODE] = 'deleted file'
523
523
524 # it's not ADD not DELETE
524 # it's not ADD not DELETE
525 if op is None:
525 if op is None:
526 op = OPS.MOD
526 op = OPS.MOD
527 stats['binary'] = True
527 stats['binary'] = True
528 stats['ops'][MOD_FILENODE] = 'modified file'
528 stats['ops'][MOD_FILENODE] = 'modified file'
529
529
530 # a real non-binary diff
530 # a real non-binary diff
531 if head['a_file'] or head['b_file']:
531 if head['a_file'] or head['b_file']:
532 diff = iter(chunk.diff.splitlines(1))
532 diff = iter(chunk.diff.splitlines(1))
533
533
534 # append each file to the diff size
534 # append each file to the diff size
535 raw_chunk_size = len(raw_diff)
535 raw_chunk_size = len(raw_diff)
536
536
537 exceeds_limit = raw_chunk_size > self.file_limit
537 exceeds_limit = raw_chunk_size > self.file_limit
538 self.cur_diff_size += raw_chunk_size
538 self.cur_diff_size += raw_chunk_size
539
539
540 try:
540 try:
541 # Check each file instead of the whole diff.
541 # Check each file instead of the whole diff.
542 # Diff will hide big files but still show small ones.
542 # Diff will hide big files but still show small ones.
543 # From the tests big files are fairly safe to be parsed
543 # From the tests big files are fairly safe to be parsed
544 # but the browser is the bottleneck.
544 # but the browser is the bottleneck.
545 if not self.show_full_diff and exceeds_limit:
545 if not self.show_full_diff and exceeds_limit:
546 log.debug('File `%s` exceeds current file_limit of %s',
546 log.debug('File `%s` exceeds current file_limit of %s',
547 safe_unicode(head['b_path']), self.file_limit)
547 safe_unicode(head['b_path']), self.file_limit)
548 raise DiffLimitExceeded(
548 raise DiffLimitExceeded(
549 'File Limit %s Exceeded', self.file_limit)
549 'File Limit %s Exceeded', self.file_limit)
550
550
551 self._check_large_diff()
551 self._check_large_diff()
552
552
553 raw_diff, chunks, _stats = self._new_parse_lines(diff)
553 raw_diff, chunks, _stats = self._new_parse_lines(diff)
554 stats['binary'] = False
554 stats['binary'] = False
555 stats['added'] = _stats[0]
555 stats['added'] = _stats[0]
556 stats['deleted'] = _stats[1]
556 stats['deleted'] = _stats[1]
557 # explicit mark that it's a modified file
557 # explicit mark that it's a modified file
558 if op == OPS.MOD:
558 if op == OPS.MOD:
559 stats['ops'][MOD_FILENODE] = 'modified file'
559 stats['ops'][MOD_FILENODE] = 'modified file'
560
560
561 except DiffLimitExceeded:
561 except DiffLimitExceeded:
562 diff_container = lambda _diff: \
562 diff_container = lambda _diff: \
563 LimitedDiffContainer(
563 LimitedDiffContainer(
564 self.diff_limit, self.cur_diff_size, _diff)
564 self.diff_limit, self.cur_diff_size, _diff)
565
565
566 limited_diff = True
566 limited_diff = True
567 chunks = []
567 chunks = []
568
568
569 else: # GIT format binary patch, or possibly empty diff
569 else: # GIT format binary patch, or possibly empty diff
570 if head['bin_patch']:
570 if head['bin_patch']:
571 # we have operation already extracted, but we mark simply
571 # we have operation already extracted, but we mark simply
572 # it's a diff we wont show for binary files
572 # it's a diff we wont show for binary files
573 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
573 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
574 chunks = []
574 chunks = []
575
575
576 # Hide content of deleted node by setting empty chunks
576 # Hide content of deleted node by setting empty chunks
577 if chunks and not self.show_full_diff and op == OPS.DEL:
577 if chunks and not self.show_full_diff and op == OPS.DEL:
578 # if not full diff mode show deleted file contents
578 # if not full diff mode show deleted file contents
579 # TODO: anderson: if the view is not too big, there is no way
579 # TODO: anderson: if the view is not too big, there is no way
580 # to see the content of the file
580 # to see the content of the file
581 chunks = []
581 chunks = []
582
582
583 chunks.insert(
583 chunks.insert(
584 0, [{'old_lineno': '',
584 0, [{'old_lineno': '',
585 'new_lineno': '',
585 'new_lineno': '',
586 'action': Action.CONTEXT,
586 'action': Action.CONTEXT,
587 'line': msg,
587 'line': msg,
588 } for _op, msg in stats['ops'].iteritems()
588 } for _op, msg in stats['ops'].iteritems()
589 if _op not in [MOD_FILENODE]])
589 if _op not in [MOD_FILENODE]])
590
590
591 original_filename = safe_unicode(head['a_path'])
591 original_filename = safe_unicode(head['a_path'])
592 _files.append({
592 _files.append({
593 'original_filename': original_filename,
593 'original_filename': original_filename,
594 'filename': safe_unicode(head['b_path']),
594 'filename': safe_unicode(head['b_path']),
595 'old_revision': head['a_blob_id'],
595 'old_revision': head['a_blob_id'],
596 'new_revision': head['b_blob_id'],
596 'new_revision': head['b_blob_id'],
597 'chunks': chunks,
597 'chunks': chunks,
598 'raw_diff': safe_unicode(raw_diff),
598 'raw_diff': safe_unicode(raw_diff),
599 'operation': op,
599 'operation': op,
600 'stats': stats,
600 'stats': stats,
601 'exceeds_limit': exceeds_limit,
601 'exceeds_limit': exceeds_limit,
602 'is_limited_diff': limited_diff,
602 'is_limited_diff': limited_diff,
603 })
603 })
604
604
605 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
605 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
606 OPS.DEL: 2}.get(info['operation'])
606 OPS.DEL: 2}.get(info['operation'])
607
607
608 return diff_container(sorted(_files, key=sorter))
608 return diff_container(sorted(_files, key=sorter))
609
609
610 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
610 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
611 def _parse_lines(self, diff):
611 def _parse_lines(self, diff):
612 """
612 """
613 Parse the diff an return data for the template.
613 Parse the diff an return data for the template.
614 """
614 """
615
615
616 lineiter = iter(diff)
616 lineiter = iter(diff)
617 stats = [0, 0]
617 stats = [0, 0]
618 chunks = []
618 chunks = []
619 raw_diff = []
619 raw_diff = []
620
620
621 try:
621 try:
622 line = lineiter.next()
622 line = lineiter.next()
623
623
624 while line:
624 while line:
625 raw_diff.append(line)
625 raw_diff.append(line)
626 lines = []
626 lines = []
627 chunks.append(lines)
627 chunks.append(lines)
628
628
629 match = self._chunk_re.match(line)
629 match = self._chunk_re.match(line)
630
630
631 if not match:
631 if not match:
632 break
632 break
633
633
634 gr = match.groups()
634 gr = match.groups()
635 (old_line, old_end,
635 (old_line, old_end,
636 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
636 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
637 old_line -= 1
637 old_line -= 1
638 new_line -= 1
638 new_line -= 1
639
639
640 context = len(gr) == 5
640 context = len(gr) == 5
641 old_end += old_line
641 old_end += old_line
642 new_end += new_line
642 new_end += new_line
643
643
644 if context:
644 if context:
645 # skip context only if it's first line
645 # skip context only if it's first line
646 if int(gr[0]) > 1:
646 if int(gr[0]) > 1:
647 lines.append({
647 lines.append({
648 'old_lineno': '...',
648 'old_lineno': '...',
649 'new_lineno': '...',
649 'new_lineno': '...',
650 'action': Action.CONTEXT,
650 'action': Action.CONTEXT,
651 'line': line,
651 'line': line,
652 })
652 })
653
653
654 line = lineiter.next()
654 line = lineiter.next()
655
655
656 while old_line < old_end or new_line < new_end:
656 while old_line < old_end or new_line < new_end:
657 command = ' '
657 command = ' '
658 if line:
658 if line:
659 command = line[0]
659 command = line[0]
660
660
661 affects_old = affects_new = False
661 affects_old = affects_new = False
662
662
663 # ignore those if we don't expect them
663 # ignore those if we don't expect them
664 if command in '#@':
664 if command in '#@':
665 continue
665 continue
666 elif command == '+':
666 elif command == '+':
667 affects_new = True
667 affects_new = True
668 action = Action.ADD
668 action = Action.ADD
669 stats[0] += 1
669 stats[0] += 1
670 elif command == '-':
670 elif command == '-':
671 affects_old = True
671 affects_old = True
672 action = Action.DELETE
672 action = Action.DELETE
673 stats[1] += 1
673 stats[1] += 1
674 else:
674 else:
675 affects_old = affects_new = True
675 affects_old = affects_new = True
676 action = Action.UNMODIFIED
676 action = Action.UNMODIFIED
677
677
678 if not self._newline_marker.match(line):
678 if not self._newline_marker.match(line):
679 old_line += affects_old
679 old_line += affects_old
680 new_line += affects_new
680 new_line += affects_new
681 lines.append({
681 lines.append({
682 'old_lineno': affects_old and old_line or '',
682 'old_lineno': affects_old and old_line or '',
683 'new_lineno': affects_new and new_line or '',
683 'new_lineno': affects_new and new_line or '',
684 'action': action,
684 'action': action,
685 'line': self._clean_line(line, command)
685 'line': self._clean_line(line, command)
686 })
686 })
687 raw_diff.append(line)
687 raw_diff.append(line)
688
688
689 line = lineiter.next()
689 line = lineiter.next()
690
690
691 if self._newline_marker.match(line):
691 if self._newline_marker.match(line):
692 # we need to append to lines, since this is not
692 # we need to append to lines, since this is not
693 # counted in the line specs of diff
693 # counted in the line specs of diff
694 lines.append({
694 lines.append({
695 'old_lineno': '...',
695 'old_lineno': '...',
696 'new_lineno': '...',
696 'new_lineno': '...',
697 'action': Action.CONTEXT,
697 'action': Action.CONTEXT,
698 'line': self._clean_line(line, command)
698 'line': self._clean_line(line, command)
699 })
699 })
700
700
701 except StopIteration:
701 except StopIteration:
702 pass
702 pass
703 return ''.join(raw_diff), chunks, stats
703 return ''.join(raw_diff), chunks, stats
704
704
705 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
705 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
706 def _new_parse_lines(self, diff_iter):
706 def _new_parse_lines(self, diff_iter):
707 """
707 """
708 Parse the diff an return data for the template.
708 Parse the diff an return data for the template.
709 """
709 """
710
710
711 stats = [0, 0]
711 stats = [0, 0]
712 chunks = []
712 chunks = []
713 raw_diff = []
713 raw_diff = []
714
714
715 diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
715 diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
716
716
717 try:
717 try:
718 line = diff_iter.next()
718 line = diff_iter.next()
719
719
720 while line:
720 while line:
721 raw_diff.append(line)
721 raw_diff.append(line)
722 match = self._chunk_re.match(line)
722 match = self._chunk_re.match(line)
723
723
724 if not match:
724 if not match:
725 break
725 break
726
726
727 gr = match.groups()
727 gr = match.groups()
728 (old_line, old_end,
728 (old_line, old_end,
729 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
729 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
730
730
731 lines = []
731 lines = []
732 hunk = {
732 hunk = {
733 'section_header': gr[-1],
733 'section_header': gr[-1],
734 'source_start': old_line,
734 'source_start': old_line,
735 'source_length': old_end,
735 'source_length': old_end,
736 'target_start': new_line,
736 'target_start': new_line,
737 'target_length': new_end,
737 'target_length': new_end,
738 'lines': lines,
738 'lines': lines,
739 }
739 }
740 chunks.append(hunk)
740 chunks.append(hunk)
741
741
742 old_line -= 1
742 old_line -= 1
743 new_line -= 1
743 new_line -= 1
744
744
745 context = len(gr) == 5
745 context = len(gr) == 5
746 old_end += old_line
746 old_end += old_line
747 new_end += new_line
747 new_end += new_line
748
748
749 line = diff_iter.next()
749 line = diff_iter.next()
750
750
751 while old_line < old_end or new_line < new_end:
751 while old_line < old_end or new_line < new_end:
752 command = ' '
752 command = ' '
753 if line:
753 if line:
754 command = line[0]
754 command = line[0]
755
755
756 affects_old = affects_new = False
756 affects_old = affects_new = False
757
757
758 # ignore those if we don't expect them
758 # ignore those if we don't expect them
759 if command in '#@':
759 if command in '#@':
760 continue
760 continue
761 elif command == '+':
761 elif command == '+':
762 affects_new = True
762 affects_new = True
763 action = Action.ADD
763 action = Action.ADD
764 stats[0] += 1
764 stats[0] += 1
765 elif command == '-':
765 elif command == '-':
766 affects_old = True
766 affects_old = True
767 action = Action.DELETE
767 action = Action.DELETE
768 stats[1] += 1
768 stats[1] += 1
769 else:
769 else:
770 affects_old = affects_new = True
770 affects_old = affects_new = True
771 action = Action.UNMODIFIED
771 action = Action.UNMODIFIED
772
772
773 if not self._newline_marker.match(line):
773 if not self._newline_marker.match(line):
774 old_line += affects_old
774 old_line += affects_old
775 new_line += affects_new
775 new_line += affects_new
776 lines.append({
776 lines.append({
777 'old_lineno': affects_old and old_line or '',
777 'old_lineno': affects_old and old_line or '',
778 'new_lineno': affects_new and new_line or '',
778 'new_lineno': affects_new and new_line or '',
779 'action': action,
779 'action': action,
780 'line': self._clean_line(line, command)
780 'line': self._clean_line(line, command)
781 })
781 })
782 raw_diff.append(line)
782 raw_diff.append(line)
783
783
784 line = diff_iter.next()
784 line = diff_iter.next()
785
785
786 if self._newline_marker.match(line):
786 if self._newline_marker.match(line):
787 # we need to append to lines, since this is not
787 # we need to append to lines, since this is not
788 # counted in the line specs of diff
788 # counted in the line specs of diff
789 if affects_old:
789 if affects_old:
790 action = Action.OLD_NO_NL
790 action = Action.OLD_NO_NL
791 elif affects_new:
791 elif affects_new:
792 action = Action.NEW_NO_NL
792 action = Action.NEW_NO_NL
793 else:
793 else:
794 raise Exception('invalid context for no newline')
794 raise Exception('invalid context for no newline')
795
795
796 lines.append({
796 lines.append({
797 'old_lineno': None,
797 'old_lineno': None,
798 'new_lineno': None,
798 'new_lineno': None,
799 'action': action,
799 'action': action,
800 'line': self._clean_line(line, command)
800 'line': self._clean_line(line, command)
801 })
801 })
802
802
803 except StopIteration:
803 except StopIteration:
804 pass
804 pass
805
805
806 return ''.join(raw_diff), chunks, stats
806 return ''.join(raw_diff), chunks, stats
807
807
808 def _safe_id(self, idstring):
808 def _safe_id(self, idstring):
809 """Make a string safe for including in an id attribute.
809 """Make a string safe for including in an id attribute.
810
810
811 The HTML spec says that id attributes 'must begin with
811 The HTML spec says that id attributes 'must begin with
812 a letter ([A-Za-z]) and may be followed by any number
812 a letter ([A-Za-z]) and may be followed by any number
813 of letters, digits ([0-9]), hyphens ("-"), underscores
813 of letters, digits ([0-9]), hyphens ("-"), underscores
814 ("_"), colons (":"), and periods (".")'. These regexps
814 ("_"), colons (":"), and periods (".")'. These regexps
815 are slightly over-zealous, in that they remove colons
815 are slightly over-zealous, in that they remove colons
816 and periods unnecessarily.
816 and periods unnecessarily.
817
817
818 Whitespace is transformed into underscores, and then
818 Whitespace is transformed into underscores, and then
819 anything which is not a hyphen or a character that
819 anything which is not a hyphen or a character that
820 matches \w (alphanumerics and underscore) is removed.
820 matches \w (alphanumerics and underscore) is removed.
821
821
822 """
822 """
823 # Transform all whitespace to underscore
823 # Transform all whitespace to underscore
824 idstring = re.sub(r'\s', "_", '%s' % idstring)
824 idstring = re.sub(r'\s', "_", '%s' % idstring)
825 # Remove everything that is not a hyphen or a member of \w
825 # Remove everything that is not a hyphen or a member of \w
826 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
826 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
827 return idstring
827 return idstring
828
828
829 def prepare(self, inline_diff=True):
829 def prepare(self, inline_diff=True):
830 """
830 """
831 Prepare the passed udiff for HTML rendering.
831 Prepare the passed udiff for HTML rendering.
832
832
833 :return: A list of dicts with diff information.
833 :return: A list of dicts with diff information.
834 """
834 """
835 parsed = self._parser(inline_diff=inline_diff)
835 parsed = self._parser(inline_diff=inline_diff)
836 self.parsed = True
836 self.parsed = True
837 self.parsed_diff = parsed
837 self.parsed_diff = parsed
838 return parsed
838 return parsed
839
839
840 def as_raw(self, diff_lines=None):
840 def as_raw(self, diff_lines=None):
841 """
841 """
842 Returns raw diff as a byte string
842 Returns raw diff as a byte string
843 """
843 """
844 return self._diff.raw
844 return self._diff.raw
845
845
846 def as_html(self, table_class='code-difftable', line_class='line',
846 def as_html(self, table_class='code-difftable', line_class='line',
847 old_lineno_class='lineno old', new_lineno_class='lineno new',
847 old_lineno_class='lineno old', new_lineno_class='lineno new',
848 code_class='code', enable_comments=False, parsed_lines=None):
848 code_class='code', enable_comments=False, parsed_lines=None):
849 """
849 """
850 Return given diff as html table with customized css classes
850 Return given diff as html table with customized css classes
851 """
851 """
852 # TODO(marcink): not sure how to pass in translator
852 # TODO(marcink): not sure how to pass in translator
853 # here in an efficient way, leave the _ for proper gettext extraction
853 # here in an efficient way, leave the _ for proper gettext extraction
854 _ = lambda s: s
854 _ = lambda s: s
855
855
856 def _link_to_if(condition, label, url):
856 def _link_to_if(condition, label, url):
857 """
857 """
858 Generates a link if condition is meet or just the label if not.
858 Generates a link if condition is meet or just the label if not.
859 """
859 """
860
860
861 if condition:
861 if condition:
862 return '''<a href="%(url)s" class="tooltip"
862 return '''<a href="%(url)s" class="tooltip"
863 title="%(title)s">%(label)s</a>''' % {
863 title="%(title)s">%(label)s</a>''' % {
864 'title': _('Click to select line'),
864 'title': _('Click to select line'),
865 'url': url,
865 'url': url,
866 'label': label
866 'label': label
867 }
867 }
868 else:
868 else:
869 return label
869 return label
870 if not self.parsed:
870 if not self.parsed:
871 self.prepare()
871 self.prepare()
872
872
873 diff_lines = self.parsed_diff
873 diff_lines = self.parsed_diff
874 if parsed_lines:
874 if parsed_lines:
875 diff_lines = parsed_lines
875 diff_lines = parsed_lines
876
876
877 _html_empty = True
877 _html_empty = True
878 _html = []
878 _html = []
879 _html.append('''<table class="%(table_class)s">\n''' % {
879 _html.append('''<table class="%(table_class)s">\n''' % {
880 'table_class': table_class
880 'table_class': table_class
881 })
881 })
882
882
883 for diff in diff_lines:
883 for diff in diff_lines:
884 for line in diff['chunks']:
884 for line in diff['chunks']:
885 _html_empty = False
885 _html_empty = False
886 for change in line:
886 for change in line:
887 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
887 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
888 'lc': line_class,
888 'lc': line_class,
889 'action': change['action']
889 'action': change['action']
890 })
890 })
891 anchor_old_id = ''
891 anchor_old_id = ''
892 anchor_new_id = ''
892 anchor_new_id = ''
893 anchor_old = "%(filename)s_o%(oldline_no)s" % {
893 anchor_old = "%(filename)s_o%(oldline_no)s" % {
894 'filename': self._safe_id(diff['filename']),
894 'filename': self._safe_id(diff['filename']),
895 'oldline_no': change['old_lineno']
895 'oldline_no': change['old_lineno']
896 }
896 }
897 anchor_new = "%(filename)s_n%(oldline_no)s" % {
897 anchor_new = "%(filename)s_n%(oldline_no)s" % {
898 'filename': self._safe_id(diff['filename']),
898 'filename': self._safe_id(diff['filename']),
899 'oldline_no': change['new_lineno']
899 'oldline_no': change['new_lineno']
900 }
900 }
901 cond_old = (change['old_lineno'] != '...' and
901 cond_old = (change['old_lineno'] != '...' and
902 change['old_lineno'])
902 change['old_lineno'])
903 cond_new = (change['new_lineno'] != '...' and
903 cond_new = (change['new_lineno'] != '...' and
904 change['new_lineno'])
904 change['new_lineno'])
905 if cond_old:
905 if cond_old:
906 anchor_old_id = 'id="%s"' % anchor_old
906 anchor_old_id = 'id="%s"' % anchor_old
907 if cond_new:
907 if cond_new:
908 anchor_new_id = 'id="%s"' % anchor_new
908 anchor_new_id = 'id="%s"' % anchor_new
909
909
910 if change['action'] != Action.CONTEXT:
910 if change['action'] != Action.CONTEXT:
911 anchor_link = True
911 anchor_link = True
912 else:
912 else:
913 anchor_link = False
913 anchor_link = False
914
914
915 ###########################################################
915 ###########################################################
916 # COMMENT ICONS
916 # COMMENT ICONS
917 ###########################################################
917 ###########################################################
918 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
918 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
919
919
920 if enable_comments and change['action'] != Action.CONTEXT:
920 if enable_comments and change['action'] != Action.CONTEXT:
921 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
921 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
922
922
923 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
923 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
924
924
925 ###########################################################
925 ###########################################################
926 # OLD LINE NUMBER
926 # OLD LINE NUMBER
927 ###########################################################
927 ###########################################################
928 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
928 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
929 'a_id': anchor_old_id,
929 'a_id': anchor_old_id,
930 'olc': old_lineno_class
930 'olc': old_lineno_class
931 })
931 })
932
932
933 _html.append('''%(link)s''' % {
933 _html.append('''%(link)s''' % {
934 'link': _link_to_if(anchor_link, change['old_lineno'],
934 'link': _link_to_if(anchor_link, change['old_lineno'],
935 '#%s' % anchor_old)
935 '#%s' % anchor_old)
936 })
936 })
937 _html.append('''</td>\n''')
937 _html.append('''</td>\n''')
938 ###########################################################
938 ###########################################################
939 # NEW LINE NUMBER
939 # NEW LINE NUMBER
940 ###########################################################
940 ###########################################################
941
941
942 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
942 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
943 'a_id': anchor_new_id,
943 'a_id': anchor_new_id,
944 'nlc': new_lineno_class
944 'nlc': new_lineno_class
945 })
945 })
946
946
947 _html.append('''%(link)s''' % {
947 _html.append('''%(link)s''' % {
948 'link': _link_to_if(anchor_link, change['new_lineno'],
948 'link': _link_to_if(anchor_link, change['new_lineno'],
949 '#%s' % anchor_new)
949 '#%s' % anchor_new)
950 })
950 })
951 _html.append('''</td>\n''')
951 _html.append('''</td>\n''')
952 ###########################################################
952 ###########################################################
953 # CODE
953 # CODE
954 ###########################################################
954 ###########################################################
955 code_classes = [code_class]
955 code_classes = [code_class]
956 if (not enable_comments or
956 if (not enable_comments or
957 change['action'] == Action.CONTEXT):
957 change['action'] == Action.CONTEXT):
958 code_classes.append('no-comment')
958 code_classes.append('no-comment')
959 _html.append('\t<td class="%s">' % ' '.join(code_classes))
959 _html.append('\t<td class="%s">' % ' '.join(code_classes))
960 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
960 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
961 'code': change['line']
961 'code': change['line']
962 })
962 })
963
963
964 _html.append('''\t</td>''')
964 _html.append('''\t</td>''')
965 _html.append('''\n</tr>\n''')
965 _html.append('''\n</tr>\n''')
966 _html.append('''</table>''')
966 _html.append('''</table>''')
967 if _html_empty:
967 if _html_empty:
968 return None
968 return None
969 return ''.join(_html)
969 return ''.join(_html)
970
970
971 def stat(self):
971 def stat(self):
972 """
972 """
973 Returns tuple of added, and removed lines for this instance
973 Returns tuple of added, and removed lines for this instance
974 """
974 """
975 return self.adds, self.removes
975 return self.adds, self.removes
976
976
977 def get_context_of_line(
977 def get_context_of_line(
978 self, path, diff_line=None, context_before=3, context_after=3):
978 self, path, diff_line=None, context_before=3, context_after=3):
979 """
979 """
980 Returns the context lines for the specified diff line.
980 Returns the context lines for the specified diff line.
981
981
982 :type diff_line: :class:`DiffLineNumber`
982 :type diff_line: :class:`DiffLineNumber`
983 """
983 """
984 assert self.parsed, "DiffProcessor is not initialized."
984 assert self.parsed, "DiffProcessor is not initialized."
985
985
986 if None not in diff_line:
986 if None not in diff_line:
987 raise ValueError(
987 raise ValueError(
988 "Cannot specify both line numbers: {}".format(diff_line))
988 "Cannot specify both line numbers: {}".format(diff_line))
989
989
990 file_diff = self._get_file_diff(path)
990 file_diff = self._get_file_diff(path)
991 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
991 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
992
992
993 first_line_to_include = max(idx - context_before, 0)
993 first_line_to_include = max(idx - context_before, 0)
994 first_line_after_context = idx + context_after + 1
994 first_line_after_context = idx + context_after + 1
995 context_lines = chunk[first_line_to_include:first_line_after_context]
995 context_lines = chunk[first_line_to_include:first_line_after_context]
996
996
997 line_contents = [
997 line_contents = [
998 _context_line(line) for line in context_lines
998 _context_line(line) for line in context_lines
999 if _is_diff_content(line)]
999 if _is_diff_content(line)]
1000 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1000 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1001 # Once they are fixed, we can drop this line here.
1001 # Once they are fixed, we can drop this line here.
1002 if line_contents:
1002 if line_contents:
1003 line_contents[-1] = (
1003 line_contents[-1] = (
1004 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1004 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1005 return line_contents
1005 return line_contents
1006
1006
1007 def find_context(self, path, context, offset=0):
1007 def find_context(self, path, context, offset=0):
1008 """
1008 """
1009 Finds the given `context` inside of the diff.
1009 Finds the given `context` inside of the diff.
1010
1010
1011 Use the parameter `offset` to specify which offset the target line has
1011 Use the parameter `offset` to specify which offset the target line has
1012 inside of the given `context`. This way the correct diff line will be
1012 inside of the given `context`. This way the correct diff line will be
1013 returned.
1013 returned.
1014
1014
1015 :param offset: Shall be used to specify the offset of the main line
1015 :param offset: Shall be used to specify the offset of the main line
1016 within the given `context`.
1016 within the given `context`.
1017 """
1017 """
1018 if offset < 0 or offset >= len(context):
1018 if offset < 0 or offset >= len(context):
1019 raise ValueError(
1019 raise ValueError(
1020 "Only positive values up to the length of the context "
1020 "Only positive values up to the length of the context "
1021 "minus one are allowed.")
1021 "minus one are allowed.")
1022
1022
1023 matches = []
1023 matches = []
1024 file_diff = self._get_file_diff(path)
1024 file_diff = self._get_file_diff(path)
1025
1025
1026 for chunk in file_diff['chunks']:
1026 for chunk in file_diff['chunks']:
1027 context_iter = iter(context)
1027 context_iter = iter(context)
1028 for line_idx, line in enumerate(chunk):
1028 for line_idx, line in enumerate(chunk):
1029 try:
1029 try:
1030 if _context_line(line) == context_iter.next():
1030 if _context_line(line) == context_iter.next():
1031 continue
1031 continue
1032 except StopIteration:
1032 except StopIteration:
1033 matches.append((line_idx, chunk))
1033 matches.append((line_idx, chunk))
1034 context_iter = iter(context)
1034 context_iter = iter(context)
1035
1035
1036 # Increment position and triger StopIteration
1036 # Increment position and triger StopIteration
1037 # if we had a match at the end
1037 # if we had a match at the end
1038 line_idx += 1
1038 line_idx += 1
1039 try:
1039 try:
1040 context_iter.next()
1040 context_iter.next()
1041 except StopIteration:
1041 except StopIteration:
1042 matches.append((line_idx, chunk))
1042 matches.append((line_idx, chunk))
1043
1043
1044 effective_offset = len(context) - offset
1044 effective_offset = len(context) - offset
1045 found_at_diff_lines = [
1045 found_at_diff_lines = [
1046 _line_to_diff_line_number(chunk[idx - effective_offset])
1046 _line_to_diff_line_number(chunk[idx - effective_offset])
1047 for idx, chunk in matches]
1047 for idx, chunk in matches]
1048
1048
1049 return found_at_diff_lines
1049 return found_at_diff_lines
1050
1050
1051 def _get_file_diff(self, path):
1051 def _get_file_diff(self, path):
1052 for file_diff in self.parsed_diff:
1052 for file_diff in self.parsed_diff:
1053 if file_diff['filename'] == path:
1053 if file_diff['filename'] == path:
1054 break
1054 break
1055 else:
1055 else:
1056 raise FileNotInDiffException("File {} not in diff".format(path))
1056 raise FileNotInDiffException("File {} not in diff".format(path))
1057 return file_diff
1057 return file_diff
1058
1058
1059 def _find_chunk_line_index(self, file_diff, diff_line):
1059 def _find_chunk_line_index(self, file_diff, diff_line):
1060 for chunk in file_diff['chunks']:
1060 for chunk in file_diff['chunks']:
1061 for idx, line in enumerate(chunk):
1061 for idx, line in enumerate(chunk):
1062 if line['old_lineno'] == diff_line.old:
1062 if line['old_lineno'] == diff_line.old:
1063 return chunk, idx
1063 return chunk, idx
1064 if line['new_lineno'] == diff_line.new:
1064 if line['new_lineno'] == diff_line.new:
1065 return chunk, idx
1065 return chunk, idx
1066 raise LineNotInDiffException(
1066 raise LineNotInDiffException(
1067 "The line {} is not part of the diff.".format(diff_line))
1067 "The line {} is not part of the diff.".format(diff_line))
1068
1068
1069
1069
1070 def _is_diff_content(line):
1070 def _is_diff_content(line):
1071 return line['action'] in (
1071 return line['action'] in (
1072 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1072 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1073
1073
1074
1074
1075 def _context_line(line):
1075 def _context_line(line):
1076 return (line['action'], line['line'])
1076 return (line['action'], line['line'])
1077
1077
1078
1078
1079 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1079 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1080
1080
1081
1081
1082 def _line_to_diff_line_number(line):
1082 def _line_to_diff_line_number(line):
1083 new_line_no = line['new_lineno'] or None
1083 new_line_no = line['new_lineno'] or None
1084 old_line_no = line['old_lineno'] or None
1084 old_line_no = line['old_lineno'] or None
1085 return DiffLineNumber(old=old_line_no, new=new_line_no)
1085 return DiffLineNumber(old=old_line_no, new=new_line_no)
1086
1086
1087
1087
1088 class FileNotInDiffException(Exception):
1088 class FileNotInDiffException(Exception):
1089 """
1089 """
1090 Raised when the context for a missing file is requested.
1090 Raised when the context for a missing file is requested.
1091
1091
1092 If you request the context for a line in a file which is not part of the
1092 If you request the context for a line in a file which is not part of the
1093 given diff, then this exception is raised.
1093 given diff, then this exception is raised.
1094 """
1094 """
1095
1095
1096
1096
1097 class LineNotInDiffException(Exception):
1097 class LineNotInDiffException(Exception):
1098 """
1098 """
1099 Raised when the context for a missing line is requested.
1099 Raised when the context for a missing line is requested.
1100
1100
1101 If you request the context for a line in a file and this line is not
1101 If you request the context for a line in a file and this line is not
1102 part of the given diff, then this exception is raised.
1102 part of the given diff, then this exception is raised.
1103 """
1103 """
1104
1104
1105
1105
1106 class DiffLimitExceeded(Exception):
1106 class DiffLimitExceeded(Exception):
1107 pass
1107 pass
@@ -1,805 +1,813
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2010-2017 RhodeCode GmbH
3 # Copyright (C) 2010-2017 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import textwrap
21 import textwrap
22
22
23 import pytest
23 import pytest
24
24
25 from rhodecode.lib.diffs import (
25 from rhodecode.lib.diffs import (
26 DiffProcessor,
26 DiffProcessor,
27 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
27 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
28 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
28 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
29 from rhodecode.tests.fixture import Fixture
29 from rhodecode.tests.fixture import Fixture
30 from rhodecode.lib.vcs.backends.git.repository import GitDiff
30 from rhodecode.lib.vcs.backends.git.repository import GitDiff
31 from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
31 from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
32 from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
32 from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
33
33
34 fixture = Fixture()
34 fixture = Fixture()
35
35
36
36
37 def test_diffprocessor_as_html_with_comments():
37 def test_diffprocessor_as_html_with_comments():
38 raw_diff = textwrap.dedent('''
38 raw_diff = textwrap.dedent('''
39 diff --git a/setup.py b/setup.py
39 diff --git a/setup.py b/setup.py
40 index 5b36422..cfd698e 100755
40 index 5b36422..cfd698e 100755
41 --- a/setup.py
41 --- a/setup.py
42 +++ b/setup.py
42 +++ b/setup.py
43 @@ -2,7 +2,7 @@
43 @@ -2,7 +2,7 @@
44 #!/usr/bin/python
44 #!/usr/bin/python
45 # Setup file for X
45 # Setup file for X
46 # Copyright (C) No one
46 # Copyright (C) No one
47 -
47 -
48 +x
48 +x
49 try:
49 try:
50 from setuptools import setup, Extension
50 from setuptools import setup, Extension
51 except ImportError:
51 except ImportError:
52 ''')
52 ''')
53 diff = GitDiff(raw_diff)
53 diff = GitDiff(raw_diff)
54 processor = DiffProcessor(diff)
54 processor = DiffProcessor(diff)
55 processor.prepare()
55 processor.prepare()
56
56
57 # Note that the cell with the context in line 5 (in the html) has the
57 # Note that the cell with the context in line 5 (in the html) has the
58 # no-comment class, which will prevent the add comment icon to be displayed.
58 # no-comment class, which will prevent the add comment icon to be displayed.
59 expected_html = textwrap.dedent('''
59 expected_html = textwrap.dedent('''
60 <table class="code-difftable">
60 <table class="code-difftable">
61 <tr class="line context">
61 <tr class="line context">
62 <td class="add-comment-line"><span class="add-comment-content"></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
62 <td class="add-comment-line"><span class="add-comment-content"></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
63 <td class="lineno old">...</td>
63 <td class="lineno old">...</td>
64 <td class="lineno new">...</td>
64 <td class="lineno new">...</td>
65 <td class="code no-comment">
65 <td class="code no-comment">
66 <pre>@@ -2,7 +2,7 @@
66 <pre>@@ -2,7 +2,7 @@
67 </pre>
67 </pre>
68 </td>
68 </td>
69 </tr>
69 </tr>
70 <tr class="line unmod">
70 <tr class="line unmod">
71 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
71 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
72 <td id="setuppy_o2" class="lineno old"><a href="#setuppy_o2" class="tooltip"
72 <td id="setuppy_o2" class="lineno old"><a href="#setuppy_o2" class="tooltip"
73 title="Click to select line">2</a></td>
73 title="Click to select line">2</a></td>
74 <td id="setuppy_n2" class="lineno new"><a href="#setuppy_n2" class="tooltip"
74 <td id="setuppy_n2" class="lineno new"><a href="#setuppy_n2" class="tooltip"
75 title="Click to select line">2</a></td>
75 title="Click to select line">2</a></td>
76 <td class="code">
76 <td class="code">
77 <pre>#!/usr/bin/python
77 <pre>#!/usr/bin/python
78 </pre>
78 </pre>
79 </td>
79 </td>
80 </tr>
80 </tr>
81 <tr class="line unmod">
81 <tr class="line unmod">
82 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
82 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
83 <td id="setuppy_o3" class="lineno old"><a href="#setuppy_o3" class="tooltip"
83 <td id="setuppy_o3" class="lineno old"><a href="#setuppy_o3" class="tooltip"
84 title="Click to select line">3</a></td>
84 title="Click to select line">3</a></td>
85 <td id="setuppy_n3" class="lineno new"><a href="#setuppy_n3" class="tooltip"
85 <td id="setuppy_n3" class="lineno new"><a href="#setuppy_n3" class="tooltip"
86 title="Click to select line">3</a></td>
86 title="Click to select line">3</a></td>
87 <td class="code">
87 <td class="code">
88 <pre># Setup file for X
88 <pre># Setup file for X
89 </pre>
89 </pre>
90 </td>
90 </td>
91 </tr>
91 </tr>
92 <tr class="line unmod">
92 <tr class="line unmod">
93 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
93 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
94 <td id="setuppy_o4" class="lineno old"><a href="#setuppy_o4" class="tooltip"
94 <td id="setuppy_o4" class="lineno old"><a href="#setuppy_o4" class="tooltip"
95 title="Click to select line">4</a></td>
95 title="Click to select line">4</a></td>
96 <td id="setuppy_n4" class="lineno new"><a href="#setuppy_n4" class="tooltip"
96 <td id="setuppy_n4" class="lineno new"><a href="#setuppy_n4" class="tooltip"
97 title="Click to select line">4</a></td>
97 title="Click to select line">4</a></td>
98 <td class="code">
98 <td class="code">
99 <pre># Copyright (C) No one
99 <pre># Copyright (C) No one
100 </pre>
100 </pre>
101 </td>
101 </td>
102 </tr>
102 </tr>
103 <tr class="line del">
103 <tr class="line del">
104 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
104 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
105 <td id="setuppy_o5" class="lineno old"><a href="#setuppy_o5" class="tooltip"
105 <td id="setuppy_o5" class="lineno old"><a href="#setuppy_o5" class="tooltip"
106 title="Click to select line">5</a></td>
106 title="Click to select line">5</a></td>
107 <td class="lineno new"><a href="#setuppy_n" class="tooltip"
107 <td class="lineno new"><a href="#setuppy_n" class="tooltip"
108 title="Click to select line"></a></td>
108 title="Click to select line"></a></td>
109 <td class="code">
109 <td class="code">
110 <pre>
110 <pre>
111 </pre>
111 </pre>
112 </td>
112 </td>
113 </tr>
113 </tr>
114 <tr class="line add">
114 <tr class="line add">
115 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
115 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
116 <td class="lineno old"><a href="#setuppy_o" class="tooltip"
116 <td class="lineno old"><a href="#setuppy_o" class="tooltip"
117 title="Click to select line"></a></td>
117 title="Click to select line"></a></td>
118 <td id="setuppy_n5" class="lineno new"><a href="#setuppy_n5" class="tooltip"
118 <td id="setuppy_n5" class="lineno new"><a href="#setuppy_n5" class="tooltip"
119 title="Click to select line">5</a></td>
119 title="Click to select line">5</a></td>
120 <td class="code">
120 <td class="code">
121 <pre><ins>x</ins>
121 <pre><ins>x</ins>
122 </pre>
122 </pre>
123 </td>
123 </td>
124 </tr>
124 </tr>
125 <tr class="line unmod">
125 <tr class="line unmod">
126 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
126 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
127 <td id="setuppy_o6" class="lineno old"><a href="#setuppy_o6" class="tooltip"
127 <td id="setuppy_o6" class="lineno old"><a href="#setuppy_o6" class="tooltip"
128 title="Click to select line">6</a></td>
128 title="Click to select line">6</a></td>
129 <td id="setuppy_n6" class="lineno new"><a href="#setuppy_n6" class="tooltip"
129 <td id="setuppy_n6" class="lineno new"><a href="#setuppy_n6" class="tooltip"
130 title="Click to select line">6</a></td>
130 title="Click to select line">6</a></td>
131 <td class="code">
131 <td class="code">
132 <pre>try:
132 <pre>try:
133 </pre>
133 </pre>
134 </td>
134 </td>
135 </tr>
135 </tr>
136 <tr class="line unmod">
136 <tr class="line unmod">
137 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
137 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
138 <td id="setuppy_o7" class="lineno old"><a href="#setuppy_o7" class="tooltip"
138 <td id="setuppy_o7" class="lineno old"><a href="#setuppy_o7" class="tooltip"
139 title="Click to select line">7</a></td>
139 title="Click to select line">7</a></td>
140 <td id="setuppy_n7" class="lineno new"><a href="#setuppy_n7" class="tooltip"
140 <td id="setuppy_n7" class="lineno new"><a href="#setuppy_n7" class="tooltip"
141 title="Click to select line">7</a></td>
141 title="Click to select line">7</a></td>
142 <td class="code">
142 <td class="code">
143 <pre> from setuptools import setup, Extension
143 <pre> from setuptools import setup, Extension
144 </pre>
144 </pre>
145 </td>
145 </td>
146 </tr>
146 </tr>
147 <tr class="line unmod">
147 <tr class="line unmod">
148 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
148 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
149 <td id="setuppy_o8" class="lineno old"><a href="#setuppy_o8" class="tooltip"
149 <td id="setuppy_o8" class="lineno old"><a href="#setuppy_o8" class="tooltip"
150 title="Click to select line">8</a></td>
150 title="Click to select line">8</a></td>
151 <td id="setuppy_n8" class="lineno new"><a href="#setuppy_n8" class="tooltip"
151 <td id="setuppy_n8" class="lineno new"><a href="#setuppy_n8" class="tooltip"
152 title="Click to select line">8</a></td>
152 title="Click to select line">8</a></td>
153 <td class="code">
153 <td class="code">
154 <pre>except ImportError:
154 <pre>except ImportError:
155 </pre>
155 </pre>
156 </td>
156 </td>
157 </tr>
157 </tr>
158 </table>
158 </table>
159 ''').strip()
159 ''').strip()
160 html = processor.as_html(enable_comments=True).replace('\t', ' ')
160 html = processor.as_html(enable_comments=True).replace('\t', ' ')
161
161
162 assert html == expected_html
162 assert html == expected_html
163
163
164
164
165 class TestMixedFilenameEncodings:
165 class TestMixedFilenameEncodings:
166
166
167 @pytest.fixture(scope="class")
167 @pytest.fixture(scope="class")
168 def raw_diff(self):
168 def raw_diff(self):
169 return fixture.load_resource(
169 return fixture.load_resource(
170 'hg_diff_mixed_filename_encodings.diff')
170 'hg_diff_mixed_filename_encodings.diff')
171
171
172 @pytest.fixture
172 @pytest.fixture
173 def processor(self, raw_diff):
173 def processor(self, raw_diff):
174 diff = MercurialDiff(raw_diff)
174 diff = MercurialDiff(raw_diff)
175 processor = DiffProcessor(diff)
175 processor = DiffProcessor(diff)
176 return processor
176 return processor
177
177
178 def test_filenames_are_decoded_to_unicode(self, processor):
178 def test_filenames_are_decoded_to_unicode(self, processor):
179 diff_data = processor.prepare()
179 diff_data = processor.prepare()
180 filenames = [item['filename'] for item in diff_data]
180 filenames = [item['filename'] for item in diff_data]
181 assert filenames == [
181 assert filenames == [
182 u'spΓ€cial-utf8.txt', u'spοΏ½cial-cp1252.txt', u'spοΏ½cial-latin1.txt']
182 u'spΓ€cial-utf8.txt', u'spοΏ½cial-cp1252.txt', u'spοΏ½cial-latin1.txt']
183
183
184 def test_raw_diff_is_decoded_to_unicode(self, processor):
184 def test_raw_diff_is_decoded_to_unicode(self, processor):
185 diff_data = processor.prepare()
185 diff_data = processor.prepare()
186 raw_diffs = [item['raw_diff'] for item in diff_data]
186 raw_diffs = [item['raw_diff'] for item in diff_data]
187 new_file_message = u'\nnew file mode 100644\n'
187 new_file_message = u'\nnew file mode 100644\n'
188 expected_raw_diffs = [
188 expected_raw_diffs = [
189 u' a/spΓ€cial-utf8.txt b/spΓ€cial-utf8.txt' + new_file_message,
189 u' a/spΓ€cial-utf8.txt b/spΓ€cial-utf8.txt' + new_file_message,
190 u' a/spοΏ½cial-cp1252.txt b/spοΏ½cial-cp1252.txt' + new_file_message,
190 u' a/spοΏ½cial-cp1252.txt b/spοΏ½cial-cp1252.txt' + new_file_message,
191 u' a/spοΏ½cial-latin1.txt b/spοΏ½cial-latin1.txt' + new_file_message]
191 u' a/spοΏ½cial-latin1.txt b/spοΏ½cial-latin1.txt' + new_file_message]
192 assert raw_diffs == expected_raw_diffs
192 assert raw_diffs == expected_raw_diffs
193
193
194 def test_as_raw_preserves_the_encoding(self, processor, raw_diff):
194 def test_as_raw_preserves_the_encoding(self, processor, raw_diff):
195 assert processor.as_raw() == raw_diff
195 assert processor.as_raw() == raw_diff
196
196
197
197
198 # TODO: mikhail: format the following data structure properly
198 # TODO: mikhail: format the following data structure properly
199 DIFF_FIXTURES = [
199 DIFF_FIXTURES = [
200 ('hg',
200 ('hg',
201 'hg_diff_add_single_binary_file.diff',
201 'hg_diff_add_single_binary_file.diff',
202 [('US Warszawa.jpg', 'A',
202 [('US Warszawa.jpg', 'A',
203 {'added': 0,
203 {'added': 0,
204 'deleted': 0,
204 'deleted': 0,
205 'binary': True,
205 'binary': True,
206 'ops': {NEW_FILENODE: 'new file 100755',
206 'ops': {NEW_FILENODE: 'new file 100755',
207 BIN_FILENODE: 'binary diff hidden'}}),
207 BIN_FILENODE: 'binary diff hidden'}}),
208 ]),
208 ]),
209 ('hg',
209 ('hg',
210 'hg_diff_mod_single_binary_file.diff',
210 'hg_diff_mod_single_binary_file.diff',
211 [('US Warszawa.jpg', 'M',
211 [('US Warszawa.jpg', 'M',
212 {'added': 0,
212 {'added': 0,
213 'deleted': 0,
213 'deleted': 0,
214 'binary': True,
214 'binary': True,
215 'ops': {MOD_FILENODE: 'modified file',
215 'ops': {MOD_FILENODE: 'modified file',
216 BIN_FILENODE: 'binary diff hidden'}}),
216 BIN_FILENODE: 'binary diff hidden'}}),
217 ]),
217 ]),
218 ('hg',
218 ('hg',
219 'hg_diff_mod_single_file_and_rename_and_chmod.diff',
219 'hg_diff_mod_single_file_and_rename_and_chmod.diff',
220 [('README', 'M',
220 [('README', 'M',
221 {'added': 3,
221 {'added': 3,
222 'deleted': 0,
222 'deleted': 0,
223 'binary': False,
223 'binary': False,
224 'ops': {MOD_FILENODE: 'modified file',
224 'ops': {MOD_FILENODE: 'modified file',
225 RENAMED_FILENODE: 'file renamed from README.rst to README',
225 RENAMED_FILENODE: 'file renamed from README.rst to README',
226 CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
226 CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
227 ]),
227 ]),
228 ('hg',
228 ('hg',
229 'hg_diff_no_newline.diff',
230 [('server.properties', 'M',
231 {'added': 2,
232 'deleted': 1,
233 'binary': False,
234 'ops': {MOD_FILENODE: 'modified file'}}),
235 ]),
236 ('hg',
229 'hg_diff_mod_file_and_rename.diff',
237 'hg_diff_mod_file_and_rename.diff',
230 [('README.rst', 'M',
238 [('README.rst', 'M',
231 {'added': 3,
239 {'added': 3,
232 'deleted': 0,
240 'deleted': 0,
233 'binary': False,
241 'binary': False,
234 'ops': {MOD_FILENODE: 'modified file',
242 'ops': {MOD_FILENODE: 'modified file',
235 RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
243 RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
236 ]),
244 ]),
237 ('hg',
245 ('hg',
238 'hg_diff_del_single_binary_file.diff',
246 'hg_diff_del_single_binary_file.diff',
239 [('US Warszawa.jpg', 'D',
247 [('US Warszawa.jpg', 'D',
240 {'added': 0,
248 {'added': 0,
241 'deleted': 0,
249 'deleted': 0,
242 'binary': True,
250 'binary': True,
243 'ops': {DEL_FILENODE: 'deleted file',
251 'ops': {DEL_FILENODE: 'deleted file',
244 BIN_FILENODE: 'binary diff hidden'}}),
252 BIN_FILENODE: 'binary diff hidden'}}),
245 ]),
253 ]),
246 ('hg',
254 ('hg',
247 'hg_diff_chmod_and_mod_single_binary_file.diff',
255 'hg_diff_chmod_and_mod_single_binary_file.diff',
248 [('gravatar.png', 'M',
256 [('gravatar.png', 'M',
249 {'added': 0,
257 {'added': 0,
250 'deleted': 0,
258 'deleted': 0,
251 'binary': True,
259 'binary': True,
252 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
260 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
253 BIN_FILENODE: 'binary diff hidden'}}),
261 BIN_FILENODE: 'binary diff hidden'}}),
254 ]),
262 ]),
255 ('hg',
263 ('hg',
256 'hg_diff_chmod.diff',
264 'hg_diff_chmod.diff',
257 [('file', 'M',
265 [('file', 'M',
258 {'added': 0,
266 {'added': 0,
259 'deleted': 0,
267 'deleted': 0,
260 'binary': True,
268 'binary': True,
261 'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
269 'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
262 ]),
270 ]),
263 ('hg',
271 ('hg',
264 'hg_diff_rename_file.diff',
272 'hg_diff_rename_file.diff',
265 [('file_renamed', 'M',
273 [('file_renamed', 'M',
266 {'added': 0,
274 {'added': 0,
267 'deleted': 0,
275 'deleted': 0,
268 'binary': True,
276 'binary': True,
269 'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
277 'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
270 ]),
278 ]),
271 ('hg',
279 ('hg',
272 'hg_diff_rename_and_chmod_file.diff',
280 'hg_diff_rename_and_chmod_file.diff',
273 [('README', 'M',
281 [('README', 'M',
274 {'added': 0,
282 {'added': 0,
275 'deleted': 0,
283 'deleted': 0,
276 'binary': True,
284 'binary': True,
277 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
285 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
278 RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
286 RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
279 ]),
287 ]),
280 ('hg',
288 ('hg',
281 'hg_diff_binary_and_normal.diff',
289 'hg_diff_binary_and_normal.diff',
282 [('img/baseline-10px.png', 'A',
290 [('img/baseline-10px.png', 'A',
283 {'added': 0,
291 {'added': 0,
284 'deleted': 0,
292 'deleted': 0,
285 'binary': True,
293 'binary': True,
286 'ops': {NEW_FILENODE: 'new file 100644',
294 'ops': {NEW_FILENODE: 'new file 100644',
287 BIN_FILENODE: 'binary diff hidden'}}),
295 BIN_FILENODE: 'binary diff hidden'}}),
288 ('js/jquery/hashgrid.js', 'A',
296 ('js/jquery/hashgrid.js', 'A',
289 {'added': 340,
297 {'added': 340,
290 'deleted': 0,
298 'deleted': 0,
291 'binary': False,
299 'binary': False,
292 'ops': {NEW_FILENODE: 'new file 100755'}}),
300 'ops': {NEW_FILENODE: 'new file 100755'}}),
293 ('index.html', 'M',
301 ('index.html', 'M',
294 {'added': 3,
302 {'added': 3,
295 'deleted': 2,
303 'deleted': 2,
296 'binary': False,
304 'binary': False,
297 'ops': {MOD_FILENODE: 'modified file'}}),
305 'ops': {MOD_FILENODE: 'modified file'}}),
298 ('less/docs.less', 'M',
306 ('less/docs.less', 'M',
299 {'added': 34,
307 {'added': 34,
300 'deleted': 0,
308 'deleted': 0,
301 'binary': False,
309 'binary': False,
302 'ops': {MOD_FILENODE: 'modified file'}}),
310 'ops': {MOD_FILENODE: 'modified file'}}),
303 ('less/scaffolding.less', 'M',
311 ('less/scaffolding.less', 'M',
304 {'added': 1,
312 {'added': 1,
305 'deleted': 3,
313 'deleted': 3,
306 'binary': False,
314 'binary': False,
307 'ops': {MOD_FILENODE: 'modified file'}}),
315 'ops': {MOD_FILENODE: 'modified file'}}),
308 ('readme.markdown', 'M',
316 ('readme.markdown', 'M',
309 {'added': 1,
317 {'added': 1,
310 'deleted': 10,
318 'deleted': 10,
311 'binary': False,
319 'binary': False,
312 'ops': {MOD_FILENODE: 'modified file'}}),
320 'ops': {MOD_FILENODE: 'modified file'}}),
313 ('img/baseline-20px.png', 'D',
321 ('img/baseline-20px.png', 'D',
314 {'added': 0,
322 {'added': 0,
315 'deleted': 0,
323 'deleted': 0,
316 'binary': True,
324 'binary': True,
317 'ops': {DEL_FILENODE: 'deleted file',
325 'ops': {DEL_FILENODE: 'deleted file',
318 BIN_FILENODE: 'binary diff hidden'}}),
326 BIN_FILENODE: 'binary diff hidden'}}),
319 ('js/global.js', 'D',
327 ('js/global.js', 'D',
320 {'added': 0,
328 {'added': 0,
321 'deleted': 75,
329 'deleted': 75,
322 'binary': False,
330 'binary': False,
323 'ops': {DEL_FILENODE: 'deleted file'}})
331 'ops': {DEL_FILENODE: 'deleted file'}})
324 ]),
332 ]),
325 ('git',
333 ('git',
326 'git_diff_chmod.diff',
334 'git_diff_chmod.diff',
327 [('work-horus.xls', 'M',
335 [('work-horus.xls', 'M',
328 {'added': 0,
336 {'added': 0,
329 'deleted': 0,
337 'deleted': 0,
330 'binary': True,
338 'binary': True,
331 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
339 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
332 ]),
340 ]),
333 ('git',
341 ('git',
334 'git_diff_rename_file.diff',
342 'git_diff_rename_file.diff',
335 [('file.xls', 'M',
343 [('file.xls', 'M',
336 {'added': 0,
344 {'added': 0,
337 'deleted': 0,
345 'deleted': 0,
338 'binary': True,
346 'binary': True,
339 'ops': {
347 'ops': {
340 RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}})
348 RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}})
341 ]),
349 ]),
342 ('git',
350 ('git',
343 'git_diff_mod_single_binary_file.diff',
351 'git_diff_mod_single_binary_file.diff',
344 [('US Warszawa.jpg', 'M',
352 [('US Warszawa.jpg', 'M',
345 {'added': 0,
353 {'added': 0,
346 'deleted': 0,
354 'deleted': 0,
347 'binary': True,
355 'binary': True,
348 'ops': {MOD_FILENODE: 'modified file',
356 'ops': {MOD_FILENODE: 'modified file',
349 BIN_FILENODE: 'binary diff hidden'}})
357 BIN_FILENODE: 'binary diff hidden'}})
350 ]),
358 ]),
351 ('git',
359 ('git',
352 'git_diff_binary_and_normal.diff',
360 'git_diff_binary_and_normal.diff',
353 [('img/baseline-10px.png', 'A',
361 [('img/baseline-10px.png', 'A',
354 {'added': 0,
362 {'added': 0,
355 'deleted': 0,
363 'deleted': 0,
356 'binary': True,
364 'binary': True,
357 'ops': {NEW_FILENODE: 'new file 100644',
365 'ops': {NEW_FILENODE: 'new file 100644',
358 BIN_FILENODE: 'binary diff hidden'}}),
366 BIN_FILENODE: 'binary diff hidden'}}),
359 ('js/jquery/hashgrid.js', 'A',
367 ('js/jquery/hashgrid.js', 'A',
360 {'added': 340,
368 {'added': 340,
361 'deleted': 0,
369 'deleted': 0,
362 'binary': False,
370 'binary': False,
363 'ops': {NEW_FILENODE: 'new file 100755'}}),
371 'ops': {NEW_FILENODE: 'new file 100755'}}),
364 ('index.html', 'M',
372 ('index.html', 'M',
365 {'added': 3,
373 {'added': 3,
366 'deleted': 2,
374 'deleted': 2,
367 'binary': False,
375 'binary': False,
368 'ops': {MOD_FILENODE: 'modified file'}}),
376 'ops': {MOD_FILENODE: 'modified file'}}),
369 ('less/docs.less', 'M',
377 ('less/docs.less', 'M',
370 {'added': 34,
378 {'added': 34,
371 'deleted': 0,
379 'deleted': 0,
372 'binary': False,
380 'binary': False,
373 'ops': {MOD_FILENODE: 'modified file'}}),
381 'ops': {MOD_FILENODE: 'modified file'}}),
374 ('less/scaffolding.less', 'M',
382 ('less/scaffolding.less', 'M',
375 {'added': 1,
383 {'added': 1,
376 'deleted': 3,
384 'deleted': 3,
377 'binary': False,
385 'binary': False,
378 'ops': {MOD_FILENODE: 'modified file'}}),
386 'ops': {MOD_FILENODE: 'modified file'}}),
379 ('readme.markdown', 'M',
387 ('readme.markdown', 'M',
380 {'added': 1,
388 {'added': 1,
381 'deleted': 10,
389 'deleted': 10,
382 'binary': False,
390 'binary': False,
383 'ops': {MOD_FILENODE: 'modified file'}}),
391 'ops': {MOD_FILENODE: 'modified file'}}),
384 ('img/baseline-20px.png', 'D',
392 ('img/baseline-20px.png', 'D',
385 {'added': 0,
393 {'added': 0,
386 'deleted': 0,
394 'deleted': 0,
387 'binary': True,
395 'binary': True,
388 'ops': {DEL_FILENODE: 'deleted file',
396 'ops': {DEL_FILENODE: 'deleted file',
389 BIN_FILENODE: 'binary diff hidden'}}),
397 BIN_FILENODE: 'binary diff hidden'}}),
390 ('js/global.js', 'D',
398 ('js/global.js', 'D',
391 {'added': 0,
399 {'added': 0,
392 'deleted': 75,
400 'deleted': 75,
393 'binary': False,
401 'binary': False,
394 'ops': {DEL_FILENODE: 'deleted file'}}),
402 'ops': {DEL_FILENODE: 'deleted file'}}),
395 ]),
403 ]),
396 ('hg',
404 ('hg',
397 'diff_with_diff_data.diff',
405 'diff_with_diff_data.diff',
398 [('vcs/backends/base.py', 'M',
406 [('vcs/backends/base.py', 'M',
399 {'added': 18,
407 {'added': 18,
400 'deleted': 2,
408 'deleted': 2,
401 'binary': False,
409 'binary': False,
402 'ops': {MOD_FILENODE: 'modified file'}}),
410 'ops': {MOD_FILENODE: 'modified file'}}),
403 ('vcs/backends/git/repository.py', 'M',
411 ('vcs/backends/git/repository.py', 'M',
404 {'added': 46,
412 {'added': 46,
405 'deleted': 15,
413 'deleted': 15,
406 'binary': False,
414 'binary': False,
407 'ops': {MOD_FILENODE: 'modified file'}}),
415 'ops': {MOD_FILENODE: 'modified file'}}),
408 ('vcs/backends/hg.py', 'M',
416 ('vcs/backends/hg.py', 'M',
409 {'added': 22,
417 {'added': 22,
410 'deleted': 3,
418 'deleted': 3,
411 'binary': False,
419 'binary': False,
412 'ops': {MOD_FILENODE: 'modified file'}}),
420 'ops': {MOD_FILENODE: 'modified file'}}),
413 ('vcs/tests/test_git.py', 'M',
421 ('vcs/tests/test_git.py', 'M',
414 {'added': 5,
422 {'added': 5,
415 'deleted': 5,
423 'deleted': 5,
416 'binary': False,
424 'binary': False,
417 'ops': {MOD_FILENODE: 'modified file'}}),
425 'ops': {MOD_FILENODE: 'modified file'}}),
418 ('vcs/tests/test_repository.py', 'M',
426 ('vcs/tests/test_repository.py', 'M',
419 {'added': 174,
427 {'added': 174,
420 'deleted': 2,
428 'deleted': 2,
421 'binary': False,
429 'binary': False,
422 'ops': {MOD_FILENODE: 'modified file'}}),
430 'ops': {MOD_FILENODE: 'modified file'}}),
423 ]),
431 ]),
424 ('hg',
432 ('hg',
425 'hg_diff_copy_file.diff',
433 'hg_diff_copy_file.diff',
426 [('file2', 'M',
434 [('file2', 'M',
427 {'added': 0,
435 {'added': 0,
428 'deleted': 0,
436 'deleted': 0,
429 'binary': True,
437 'binary': True,
430 'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
438 'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
431 ]),
439 ]),
432 ('hg',
440 ('hg',
433 'hg_diff_copy_and_modify_file.diff',
441 'hg_diff_copy_and_modify_file.diff',
434 [('file3', 'M',
442 [('file3', 'M',
435 {'added': 1,
443 {'added': 1,
436 'deleted': 0,
444 'deleted': 0,
437 'binary': False,
445 'binary': False,
438 'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
446 'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
439 MOD_FILENODE: 'modified file'}}),
447 MOD_FILENODE: 'modified file'}}),
440 ]),
448 ]),
441 ('hg',
449 ('hg',
442 'hg_diff_copy_and_chmod_file.diff',
450 'hg_diff_copy_and_chmod_file.diff',
443 [('file4', 'M',
451 [('file4', 'M',
444 {'added': 0,
452 {'added': 0,
445 'deleted': 0,
453 'deleted': 0,
446 'binary': True,
454 'binary': True,
447 'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
455 'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
448 CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
456 CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
449 ]),
457 ]),
450 ('hg',
458 ('hg',
451 'hg_diff_copy_chmod_and_edit_file.diff',
459 'hg_diff_copy_chmod_and_edit_file.diff',
452 [('file5', 'M',
460 [('file5', 'M',
453 {'added': 2,
461 {'added': 2,
454 'deleted': 1,
462 'deleted': 1,
455 'binary': False,
463 'binary': False,
456 'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
464 'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
457 CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
465 CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
458 MOD_FILENODE: 'modified file'}})]),
466 MOD_FILENODE: 'modified file'}})]),
459
467
460 # Diffs to validate rename and copy file with space in its name
468 # Diffs to validate rename and copy file with space in its name
461 ('git',
469 ('git',
462 'git_diff_rename_file_with_spaces.diff',
470 'git_diff_rename_file_with_spaces.diff',
463 [('file_with_ two spaces.txt', 'M',
471 [('file_with_ two spaces.txt', 'M',
464 {'added': 0,
472 {'added': 0,
465 'deleted': 0,
473 'deleted': 0,
466 'binary': True,
474 'binary': True,
467 'ops': {
475 'ops': {
468 RENAMED_FILENODE: (
476 RENAMED_FILENODE: (
469 'file renamed from file_with_ spaces.txt to file_with_ '
477 'file renamed from file_with_ spaces.txt to file_with_ '
470 ' two spaces.txt')}
478 ' two spaces.txt')}
471 }), ]),
479 }), ]),
472 ('hg',
480 ('hg',
473 'hg_diff_rename_file_with_spaces.diff',
481 'hg_diff_rename_file_with_spaces.diff',
474 [('file_changed _.txt', 'M',
482 [('file_changed _.txt', 'M',
475 {'added': 0,
483 {'added': 0,
476 'deleted': 0,
484 'deleted': 0,
477 'binary': True,
485 'binary': True,
478 'ops': {
486 'ops': {
479 RENAMED_FILENODE: (
487 RENAMED_FILENODE: (
480 'file renamed from file_ with update.txt to file_changed'
488 'file renamed from file_ with update.txt to file_changed'
481 ' _.txt')}
489 ' _.txt')}
482 }), ]),
490 }), ]),
483 ('hg',
491 ('hg',
484 'hg_diff_copy_file_with_spaces.diff',
492 'hg_diff_copy_file_with_spaces.diff',
485 [('file_copied_ with spaces.txt', 'M',
493 [('file_copied_ with spaces.txt', 'M',
486 {'added': 0,
494 {'added': 0,
487 'deleted': 0,
495 'deleted': 0,
488 'binary': True,
496 'binary': True,
489 'ops': {
497 'ops': {
490 COPIED_FILENODE: (
498 COPIED_FILENODE: (
491 'file copied from file_changed_without_spaces.txt to'
499 'file copied from file_changed_without_spaces.txt to'
492 ' file_copied_ with spaces.txt')}
500 ' file_copied_ with spaces.txt')}
493 }),
501 }),
494 ]),
502 ]),
495
503
496 # special signs from git
504 # special signs from git
497 ('git',
505 ('git',
498 'git_diff_binary_special_files.diff',
506 'git_diff_binary_special_files.diff',
499 [('css/_Icon\\r', 'A',
507 [('css/_Icon\\r', 'A',
500 {'added': 0,
508 {'added': 0,
501 'deleted': 0,
509 'deleted': 0,
502 'binary': True,
510 'binary': True,
503 'ops': {NEW_FILENODE: 'new file 100644',
511 'ops': {NEW_FILENODE: 'new file 100644',
504 BIN_FILENODE: 'binary diff hidden'}
512 BIN_FILENODE: 'binary diff hidden'}
505 }),
513 }),
506 ]),
514 ]),
507 ('git',
515 ('git',
508 'git_diff_binary_special_files_2.diff',
516 'git_diff_binary_special_files_2.diff',
509 [('css/Icon\\r', 'A',
517 [('css/Icon\\r', 'A',
510 {'added': 0,
518 {'added': 0,
511 'deleted': 0,
519 'deleted': 0,
512 'binary': True,
520 'binary': True,
513 'ops': {NEW_FILENODE: 'new file 100644', }
521 'ops': {NEW_FILENODE: 'new file 100644', }
514 }),
522 }),
515 ]),
523 ]),
516
524
517 ('svn',
525 ('svn',
518 'svn_diff_binary_add_file.diff',
526 'svn_diff_binary_add_file.diff',
519 [('intl.dll', 'A',
527 [('intl.dll', 'A',
520 {'added': 0,
528 {'added': 0,
521 'deleted': 0,
529 'deleted': 0,
522 'binary': False,
530 'binary': False,
523 'ops': {NEW_FILENODE: 'new file 10644',
531 'ops': {NEW_FILENODE: 'new file 10644',
524 #TODO(Marcink): depends on binary detection on svn patches
532 #TODO(Marcink): depends on binary detection on svn patches
525 # BIN_FILENODE: 'binary diff hidden'
533 # BIN_FILENODE: 'binary diff hidden'
526 }
534 }
527 }),
535 }),
528 ]),
536 ]),
529
537
530 ('svn',
538 ('svn',
531 'svn_diff_multiple_changes.diff',
539 'svn_diff_multiple_changes.diff',
532 [('trunk/doc/images/SettingsOverlay.png', 'M',
540 [('trunk/doc/images/SettingsOverlay.png', 'M',
533 {'added': 0,
541 {'added': 0,
534 'deleted': 0,
542 'deleted': 0,
535 'binary': False,
543 'binary': False,
536 'ops': {MOD_FILENODE: 'modified file',
544 'ops': {MOD_FILENODE: 'modified file',
537 #TODO(Marcink): depends on binary detection on svn patches
545 #TODO(Marcink): depends on binary detection on svn patches
538 # BIN_FILENODE: 'binary diff hidden'
546 # BIN_FILENODE: 'binary diff hidden'
539 }
547 }
540 }),
548 }),
541 ('trunk/doc/source/de/tsvn_ch04.xml', 'M',
549 ('trunk/doc/source/de/tsvn_ch04.xml', 'M',
542 {'added': 89,
550 {'added': 89,
543 'deleted': 34,
551 'deleted': 34,
544 'binary': False,
552 'binary': False,
545 'ops': {MOD_FILENODE: 'modified file'}
553 'ops': {MOD_FILENODE: 'modified file'}
546 }),
554 }),
547 ('trunk/doc/source/en/tsvn_ch04.xml', 'M',
555 ('trunk/doc/source/en/tsvn_ch04.xml', 'M',
548 {'added': 66,
556 {'added': 66,
549 'deleted': 21,
557 'deleted': 21,
550 'binary': False,
558 'binary': False,
551 'ops': {MOD_FILENODE: 'modified file'}
559 'ops': {MOD_FILENODE: 'modified file'}
552 }),
560 }),
553 ('trunk/src/Changelog.txt', 'M',
561 ('trunk/src/Changelog.txt', 'M',
554 {'added': 2,
562 {'added': 2,
555 'deleted': 0,
563 'deleted': 0,
556 'binary': False,
564 'binary': False,
557 'ops': {MOD_FILENODE: 'modified file'}
565 'ops': {MOD_FILENODE: 'modified file'}
558 }),
566 }),
559 ('trunk/src/Resources/TortoiseProcENG.rc', 'M',
567 ('trunk/src/Resources/TortoiseProcENG.rc', 'M',
560 {'added': 19,
568 {'added': 19,
561 'deleted': 13,
569 'deleted': 13,
562 'binary': False,
570 'binary': False,
563 'ops': {MOD_FILENODE: 'modified file'}
571 'ops': {MOD_FILENODE: 'modified file'}
564 }),
572 }),
565 ('trunk/src/TortoiseProc/SetOverlayPage.cpp', 'M',
573 ('trunk/src/TortoiseProc/SetOverlayPage.cpp', 'M',
566 {'added': 16,
574 {'added': 16,
567 'deleted': 1,
575 'deleted': 1,
568 'binary': False,
576 'binary': False,
569 'ops': {MOD_FILENODE: 'modified file'}
577 'ops': {MOD_FILENODE: 'modified file'}
570 }),
578 }),
571 ('trunk/src/TortoiseProc/SetOverlayPage.h', 'M',
579 ('trunk/src/TortoiseProc/SetOverlayPage.h', 'M',
572 {'added': 3,
580 {'added': 3,
573 'deleted': 0,
581 'deleted': 0,
574 'binary': False,
582 'binary': False,
575 'ops': {MOD_FILENODE: 'modified file'}
583 'ops': {MOD_FILENODE: 'modified file'}
576 }),
584 }),
577 ('trunk/src/TortoiseProc/resource.h', 'M',
585 ('trunk/src/TortoiseProc/resource.h', 'M',
578 {'added': 2,
586 {'added': 2,
579 'deleted': 0,
587 'deleted': 0,
580 'binary': False,
588 'binary': False,
581 'ops': {MOD_FILENODE: 'modified file'}
589 'ops': {MOD_FILENODE: 'modified file'}
582 }),
590 }),
583 ('trunk/src/TortoiseShell/ShellCache.h', 'M',
591 ('trunk/src/TortoiseShell/ShellCache.h', 'M',
584 {'added': 50,
592 {'added': 50,
585 'deleted': 1,
593 'deleted': 1,
586 'binary': False,
594 'binary': False,
587 'ops': {MOD_FILENODE: 'modified file'}
595 'ops': {MOD_FILENODE: 'modified file'}
588 }),
596 }),
589 ]),
597 ]),
590
598
591
599
592 # TODO: mikhail: do we still need this?
600 # TODO: mikhail: do we still need this?
593 # (
601 # (
594 # 'hg',
602 # 'hg',
595 # 'large_diff.diff',
603 # 'large_diff.diff',
596 # [
604 # [
597 # ('.hgignore', 'A', {
605 # ('.hgignore', 'A', {
598 # 'deleted': 0, 'binary': False, 'added': 3, 'ops': {
606 # 'deleted': 0, 'binary': False, 'added': 3, 'ops': {
599 # 1: 'new file 100644'}}),
607 # 1: 'new file 100644'}}),
600 # (
608 # (
601 # 'MANIFEST.in', 'A',
609 # 'MANIFEST.in', 'A',
602 # {'deleted': 0, 'binary': False, 'added': 3, 'ops': {
610 # {'deleted': 0, 'binary': False, 'added': 3, 'ops': {
603 # 1: 'new file 100644'}}),
611 # 1: 'new file 100644'}}),
604 # (
612 # (
605 # 'README.txt', 'A',
613 # 'README.txt', 'A',
606 # {'deleted': 0, 'binary': False, 'added': 19, 'ops': {
614 # {'deleted': 0, 'binary': False, 'added': 19, 'ops': {
607 # 1: 'new file 100644'}}),
615 # 1: 'new file 100644'}}),
608 # (
616 # (
609 # 'development.ini', 'A', {
617 # 'development.ini', 'A', {
610 # 'deleted': 0, 'binary': False, 'added': 116, 'ops': {
618 # 'deleted': 0, 'binary': False, 'added': 116, 'ops': {
611 # 1: 'new file 100644'}}),
619 # 1: 'new file 100644'}}),
612 # (
620 # (
613 # 'docs/index.txt', 'A', {
621 # 'docs/index.txt', 'A', {
614 # 'deleted': 0, 'binary': False, 'added': 19, 'ops': {
622 # 'deleted': 0, 'binary': False, 'added': 19, 'ops': {
615 # 1: 'new file 100644'}}),
623 # 1: 'new file 100644'}}),
616 # (
624 # (
617 # 'ez_setup.py', 'A', {
625 # 'ez_setup.py', 'A', {
618 # 'deleted': 0, 'binary': False, 'added': 276, 'ops': {
626 # 'deleted': 0, 'binary': False, 'added': 276, 'ops': {
619 # 1: 'new file 100644'}}),
627 # 1: 'new file 100644'}}),
620 # (
628 # (
621 # 'hgapp.py', 'A', {
629 # 'hgapp.py', 'A', {
622 # 'deleted': 0, 'binary': False, 'added': 26, 'ops': {
630 # 'deleted': 0, 'binary': False, 'added': 26, 'ops': {
623 # 1: 'new file 100644'}}),
631 # 1: 'new file 100644'}}),
624 # (
632 # (
625 # 'hgwebdir.config', 'A', {
633 # 'hgwebdir.config', 'A', {
626 # 'deleted': 0, 'binary': False, 'added': 21, 'ops': {
634 # 'deleted': 0, 'binary': False, 'added': 21, 'ops': {
627 # 1: 'new file 100644'}}),
635 # 1: 'new file 100644'}}),
628 # (
636 # (
629 # 'pylons_app.egg-info/PKG-INFO', 'A', {
637 # 'pylons_app.egg-info/PKG-INFO', 'A', {
630 # 'deleted': 0, 'binary': False, 'added': 10, 'ops': {
638 # 'deleted': 0, 'binary': False, 'added': 10, 'ops': {
631 # 1: 'new file 100644'}}),
639 # 1: 'new file 100644'}}),
632 # (
640 # (
633 # 'pylons_app.egg-info/SOURCES.txt', 'A', {
641 # 'pylons_app.egg-info/SOURCES.txt', 'A', {
634 # 'deleted': 0, 'binary': False, 'added': 33, 'ops': {
642 # 'deleted': 0, 'binary': False, 'added': 33, 'ops': {
635 # 1: 'new file 100644'}}),
643 # 1: 'new file 100644'}}),
636 # (
644 # (
637 # 'pylons_app.egg-info/dependency_links.txt', 'A', {
645 # 'pylons_app.egg-info/dependency_links.txt', 'A', {
638 # 'deleted': 0, 'binary': False, 'added': 1, 'ops': {
646 # 'deleted': 0, 'binary': False, 'added': 1, 'ops': {
639 # 1: 'new file 100644'}}),
647 # 1: 'new file 100644'}}),
640 # ]
648 # ]
641 # ),
649 # ),
642 ]
650 ]
643
651
644 DIFF_FIXTURES_WITH_CONTENT = [
652 DIFF_FIXTURES_WITH_CONTENT = [
645 (
653 (
646 'hg', 'hg_diff_single_file_change_newline.diff',
654 'hg', 'hg_diff_single_file_change_newline.diff',
647 [
655 [
648 (
656 (
649 'file_b', # filename
657 'file_b', # filename
650 'A', # change
658 'A', # change
651 { # stats
659 { # stats
652 'added': 1,
660 'added': 1,
653 'deleted': 0,
661 'deleted': 0,
654 'binary': False,
662 'binary': False,
655 'ops': {NEW_FILENODE: 'new file 100644', }
663 'ops': {NEW_FILENODE: 'new file 100644', }
656 },
664 },
657 '@@ -0,0 +1 @@\n+test_content b\n' # diff
665 '@@ -0,0 +1 @@\n+test_content b\n' # diff
658 ),
666 ),
659 ],
667 ],
660 ),
668 ),
661 (
669 (
662 'hg', 'hg_diff_double_file_change_newline.diff',
670 'hg', 'hg_diff_double_file_change_newline.diff',
663 [
671 [
664 (
672 (
665 'file_b', # filename
673 'file_b', # filename
666 'A', # change
674 'A', # change
667 { # stats
675 { # stats
668 'added': 1,
676 'added': 1,
669 'deleted': 0,
677 'deleted': 0,
670 'binary': False,
678 'binary': False,
671 'ops': {NEW_FILENODE: 'new file 100644', }
679 'ops': {NEW_FILENODE: 'new file 100644', }
672 },
680 },
673 '@@ -0,0 +1 @@\n+test_content b\n' # diff
681 '@@ -0,0 +1 @@\n+test_content b\n' # diff
674 ),
682 ),
675 (
683 (
676 'file_c', # filename
684 'file_c', # filename
677 'A', # change
685 'A', # change
678 { # stats
686 { # stats
679 'added': 1,
687 'added': 1,
680 'deleted': 0,
688 'deleted': 0,
681 'binary': False,
689 'binary': False,
682 'ops': {NEW_FILENODE: 'new file 100644', }
690 'ops': {NEW_FILENODE: 'new file 100644', }
683 },
691 },
684 '@@ -0,0 +1 @@\n+test_content c\n' # diff
692 '@@ -0,0 +1 @@\n+test_content c\n' # diff
685 ),
693 ),
686 ],
694 ],
687 ),
695 ),
688 (
696 (
689 'hg', 'hg_diff_double_file_change_double_newline.diff',
697 'hg', 'hg_diff_double_file_change_double_newline.diff',
690 [
698 [
691 (
699 (
692 'file_b', # filename
700 'file_b', # filename
693 'A', # change
701 'A', # change
694 { # stats
702 { # stats
695 'added': 1,
703 'added': 1,
696 'deleted': 0,
704 'deleted': 0,
697 'binary': False,
705 'binary': False,
698 'ops': {NEW_FILENODE: 'new file 100644', }
706 'ops': {NEW_FILENODE: 'new file 100644', }
699 },
707 },
700 '@@ -0,0 +1 @@\n+test_content b\n\n' # diff
708 '@@ -0,0 +1 @@\n+test_content b\n\n' # diff
701 ),
709 ),
702 (
710 (
703 'file_c', # filename
711 'file_c', # filename
704 'A', # change
712 'A', # change
705 { # stats
713 { # stats
706 'added': 1,
714 'added': 1,
707 'deleted': 0,
715 'deleted': 0,
708 'binary': False,
716 'binary': False,
709 'ops': {NEW_FILENODE: 'new file 100644', }
717 'ops': {NEW_FILENODE: 'new file 100644', }
710 },
718 },
711 '@@ -0,0 +1 @@\n+test_content c\n' # diff
719 '@@ -0,0 +1 @@\n+test_content c\n' # diff
712 ),
720 ),
713 ],
721 ],
714 ),
722 ),
715 (
723 (
716 'hg', 'hg_diff_four_file_change_newline.diff',
724 'hg', 'hg_diff_four_file_change_newline.diff',
717 [
725 [
718 (
726 (
719 'file', # filename
727 'file', # filename
720 'A', # change
728 'A', # change
721 { # stats
729 { # stats
722 'added': 1,
730 'added': 1,
723 'deleted': 0,
731 'deleted': 0,
724 'binary': False,
732 'binary': False,
725 'ops': {NEW_FILENODE: 'new file 100644', }
733 'ops': {NEW_FILENODE: 'new file 100644', }
726 },
734 },
727 '@@ -0,0 +1,1 @@\n+file\n' # diff
735 '@@ -0,0 +1,1 @@\n+file\n' # diff
728 ),
736 ),
729 (
737 (
730 'file2', # filename
738 'file2', # filename
731 'A', # change
739 'A', # change
732 { # stats
740 { # stats
733 'added': 1,
741 'added': 1,
734 'deleted': 0,
742 'deleted': 0,
735 'binary': False,
743 'binary': False,
736 'ops': {NEW_FILENODE: 'new file 100644', }
744 'ops': {NEW_FILENODE: 'new file 100644', }
737 },
745 },
738 '@@ -0,0 +1,1 @@\n+another line\n' # diff
746 '@@ -0,0 +1,1 @@\n+another line\n' # diff
739 ),
747 ),
740 (
748 (
741 'file3', # filename
749 'file3', # filename
742 'A', # change
750 'A', # change
743 { # stats
751 { # stats
744 'added': 1,
752 'added': 1,
745 'deleted': 0,
753 'deleted': 0,
746 'binary': False,
754 'binary': False,
747 'ops': {NEW_FILENODE: 'new file 100644', }
755 'ops': {NEW_FILENODE: 'new file 100644', }
748 },
756 },
749 '@@ -0,0 +1,1 @@\n+newline\n' # diff
757 '@@ -0,0 +1,1 @@\n+newline\n' # diff
750 ),
758 ),
751 (
759 (
752 'file4', # filename
760 'file4', # filename
753 'A', # change
761 'A', # change
754 { # stats
762 { # stats
755 'added': 1,
763 'added': 1,
756 'deleted': 0,
764 'deleted': 0,
757 'binary': False,
765 'binary': False,
758 'ops': {NEW_FILENODE: 'new file 100644', }
766 'ops': {NEW_FILENODE: 'new file 100644', }
759 },
767 },
760 '@@ -0,0 +1,1 @@\n+fil4\n\\ No newline at end of file' # diff
768 '@@ -0,0 +1,1 @@\n+fil4\n\\ No newline at end of file' # diff
761 ),
769 ),
762 ],
770 ],
763 ),
771 ),
764
772
765 ]
773 ]
766
774
767
775
768 diff_class = {
776 diff_class = {
769 'git': GitDiff,
777 'git': GitDiff,
770 'hg': MercurialDiff,
778 'hg': MercurialDiff,
771 'svn': SubversionDiff,
779 'svn': SubversionDiff,
772 }
780 }
773
781
774
782
775 @pytest.fixture(params=DIFF_FIXTURES)
783 @pytest.fixture(params=DIFF_FIXTURES)
776 def diff_fixture(request):
784 def diff_fixture(request):
777 vcs, diff_fixture, expected = request.param
785 vcs, diff_fixture, expected = request.param
778 diff_txt = fixture.load_resource(diff_fixture)
786 diff_txt = fixture.load_resource(diff_fixture)
779 diff = diff_class[vcs](diff_txt)
787 diff = diff_class[vcs](diff_txt)
780 return diff, expected
788 return diff, expected
781
789
782
790
783 def test_diff_lib(diff_fixture):
791 def test_diff_lib(diff_fixture):
784 diff, expected_data = diff_fixture
792 diff, expected_data = diff_fixture
785 diff_proc = DiffProcessor(diff)
793 diff_proc = DiffProcessor(diff)
786 diff_proc_d = diff_proc.prepare()
794 diff_proc_d = diff_proc.prepare()
787 data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
795 data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
788 assert expected_data == data
796 assert expected_data == data
789
797
790
798
791 @pytest.fixture(params=DIFF_FIXTURES_WITH_CONTENT)
799 @pytest.fixture(params=DIFF_FIXTURES_WITH_CONTENT)
792 def diff_fixture_w_content(request):
800 def diff_fixture_w_content(request):
793 vcs, diff_fixture, expected = request.param
801 vcs, diff_fixture, expected = request.param
794 diff_txt = fixture.load_resource(diff_fixture)
802 diff_txt = fixture.load_resource(diff_fixture)
795 diff = diff_class[vcs](diff_txt)
803 diff = diff_class[vcs](diff_txt)
796 return diff, expected
804 return diff, expected
797
805
798
806
799 def test_diff_lib_newlines(diff_fixture_w_content):
807 def test_diff_lib_newlines(diff_fixture_w_content):
800 diff, expected_data = diff_fixture_w_content
808 diff, expected_data = diff_fixture_w_content
801 diff_proc = DiffProcessor(diff)
809 diff_proc = DiffProcessor(diff)
802 diff_proc_d = diff_proc.prepare()
810 diff_proc_d = diff_proc.prepare()
803 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
811 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
804 for x in diff_proc_d]
812 for x in diff_proc_d]
805 assert expected_data == data
813 assert expected_data == data
General Comments 0
You need to be logged in to leave comments. Login now