##// END OF EJS Templates
diffs: in case of text lexers don't do any HL because of pygments newline...
marcink -
r2546:db577a02 stable
parent child Browse files
Show More
@@ -1,735 +1,743 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2018 RhodeCode GmbH
3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 import difflib
23 from itertools import groupby
23 from itertools import groupby
24
24
25 from pygments import lex
25 from pygments import lex
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 from pygments.lexers.special import TextLexer, Token
28
27 from rhodecode.lib.helpers import (
29 from rhodecode.lib.helpers import (
28 get_lexer_for_filenode, html_escape, get_custom_lexer)
30 get_lexer_for_filenode, html_escape, get_custom_lexer)
29 from rhodecode.lib.utils2 import AttributeDict
31 from rhodecode.lib.utils2 import AttributeDict
30 from rhodecode.lib.vcs.nodes import FileNode
32 from rhodecode.lib.vcs.nodes import FileNode
31 from rhodecode.lib.diff_match_patch import diff_match_patch
33 from rhodecode.lib.diff_match_patch import diff_match_patch
32 from rhodecode.lib.diffs import LimitedDiffContainer
34 from rhodecode.lib.diffs import LimitedDiffContainer
33 from pygments.lexers import get_lexer_by_name
35 from pygments.lexers import get_lexer_by_name
34
36
35 plain_text_lexer = get_lexer_by_name(
37 plain_text_lexer = get_lexer_by_name(
36 'text', stripall=False, stripnl=False, ensurenl=False)
38 'text', stripall=False, stripnl=False, ensurenl=False)
37
39
38
40
39 log = logging.getLogger()
41 log = logging.getLogger()
40
42
41
43
42 def filenode_as_lines_tokens(filenode, lexer=None):
44 def filenode_as_lines_tokens(filenode, lexer=None):
43 org_lexer = lexer
45 org_lexer = lexer
44 lexer = lexer or get_lexer_for_filenode(filenode)
46 lexer = lexer or get_lexer_for_filenode(filenode)
45 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
47 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
46 lexer, filenode, org_lexer)
48 lexer, filenode, org_lexer)
47 tokens = tokenize_string(filenode.content, lexer)
49 tokens = tokenize_string(filenode.content, lexer)
48 lines = split_token_stream(tokens, split_string='\n')
50 lines = split_token_stream(tokens)
49 rv = list(lines)
51 rv = list(lines)
50 return rv
52 return rv
51
53
52
54
53 def tokenize_string(content, lexer):
55 def tokenize_string(content, lexer):
54 """
56 """
55 Use pygments to tokenize some content based on a lexer
57 Use pygments to tokenize some content based on a lexer
56 ensuring all original new lines and whitespace is preserved
58 ensuring all original new lines and whitespace is preserved
57 """
59 """
58
60
59 lexer.stripall = False
61 lexer.stripall = False
60 lexer.stripnl = False
62 lexer.stripnl = False
61 lexer.ensurenl = False
63 lexer.ensurenl = False
62 for token_type, token_text in lex(content, lexer):
64
65 if isinstance(lexer, TextLexer):
66 lexed = [(Token.Text, content)]
67 else:
68 lexed = lex(content, lexer)
69
70 for token_type, token_text in lexed:
63 yield pygment_token_class(token_type), token_text
71 yield pygment_token_class(token_type), token_text
64
72
65
73
66 def split_token_stream(tokens, split_string=u'\n'):
74 def split_token_stream(tokens):
67 """
75 """
68 Take a list of (TokenType, text) tuples and split them by a string
76 Take a list of (TokenType, text) tuples and split them by a string
69
77
70 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
78 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
71 [(TEXT, 'some'), (TEXT, 'text'),
79 [(TEXT, 'some'), (TEXT, 'text'),
72 (TEXT, 'more'), (TEXT, 'text')]
80 (TEXT, 'more'), (TEXT, 'text')]
73 """
81 """
74
82
75 buffer = []
83 buffer = []
76 for token_class, token_text in tokens:
84 for token_class, token_text in tokens:
77 parts = token_text.split(split_string)
85 parts = token_text.split('\n')
78 for part in parts[:-1]:
86 for part in parts[:-1]:
79 buffer.append((token_class, part))
87 buffer.append((token_class, part))
80 yield buffer
88 yield buffer
81 buffer = []
89 buffer = []
82
90
83 buffer.append((token_class, parts[-1]))
91 buffer.append((token_class, parts[-1]))
84
92
85 if buffer:
93 if buffer:
86 yield buffer
94 yield buffer
87
95
88
96
89 def filenode_as_annotated_lines_tokens(filenode):
97 def filenode_as_annotated_lines_tokens(filenode):
90 """
98 """
91 Take a file node and return a list of annotations => lines, if no annotation
99 Take a file node and return a list of annotations => lines, if no annotation
92 is found, it will be None.
100 is found, it will be None.
93
101
94 eg:
102 eg:
95
103
96 [
104 [
97 (annotation1, [
105 (annotation1, [
98 (1, line1_tokens_list),
106 (1, line1_tokens_list),
99 (2, line2_tokens_list),
107 (2, line2_tokens_list),
100 ]),
108 ]),
101 (annotation2, [
109 (annotation2, [
102 (3, line1_tokens_list),
110 (3, line1_tokens_list),
103 ]),
111 ]),
104 (None, [
112 (None, [
105 (4, line1_tokens_list),
113 (4, line1_tokens_list),
106 ]),
114 ]),
107 (annotation1, [
115 (annotation1, [
108 (5, line1_tokens_list),
116 (5, line1_tokens_list),
109 (6, line2_tokens_list),
117 (6, line2_tokens_list),
110 ])
118 ])
111 ]
119 ]
112 """
120 """
113
121
114 commit_cache = {} # cache commit_getter lookups
122 commit_cache = {} # cache commit_getter lookups
115
123
116 def _get_annotation(commit_id, commit_getter):
124 def _get_annotation(commit_id, commit_getter):
117 if commit_id not in commit_cache:
125 if commit_id not in commit_cache:
118 commit_cache[commit_id] = commit_getter()
126 commit_cache[commit_id] = commit_getter()
119 return commit_cache[commit_id]
127 return commit_cache[commit_id]
120
128
121 annotation_lookup = {
129 annotation_lookup = {
122 line_no: _get_annotation(commit_id, commit_getter)
130 line_no: _get_annotation(commit_id, commit_getter)
123 for line_no, commit_id, commit_getter, line_content
131 for line_no, commit_id, commit_getter, line_content
124 in filenode.annotate
132 in filenode.annotate
125 }
133 }
126
134
127 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
135 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
128 for line_no, tokens
136 for line_no, tokens
129 in enumerate(filenode_as_lines_tokens(filenode), 1))
137 in enumerate(filenode_as_lines_tokens(filenode), 1))
130
138
131 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
139 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
132
140
133 for annotation, group in grouped_annotations_lines:
141 for annotation, group in grouped_annotations_lines:
134 yield (
142 yield (
135 annotation, [(line_no, tokens)
143 annotation, [(line_no, tokens)
136 for (_, line_no, tokens) in group]
144 for (_, line_no, tokens) in group]
137 )
145 )
138
146
139
147
140 def render_tokenstream(tokenstream):
148 def render_tokenstream(tokenstream):
141 result = []
149 result = []
142 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
150 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
143
151
144 if token_class:
152 if token_class:
145 result.append(u'<span class="%s">' % token_class)
153 result.append(u'<span class="%s">' % token_class)
146 else:
154 else:
147 result.append(u'<span>')
155 result.append(u'<span>')
148
156
149 for op_tag, token_text in token_ops_texts:
157 for op_tag, token_text in token_ops_texts:
150
158
151 if op_tag:
159 if op_tag:
152 result.append(u'<%s>' % op_tag)
160 result.append(u'<%s>' % op_tag)
153
161
154 escaped_text = html_escape(token_text)
162 escaped_text = html_escape(token_text)
155
163
156 # TODO: dan: investigate showing hidden characters like space/nl/tab
164 # TODO: dan: investigate showing hidden characters like space/nl/tab
157 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
165 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
158 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
166 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
159 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
167 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
160
168
161 result.append(escaped_text)
169 result.append(escaped_text)
162
170
163 if op_tag:
171 if op_tag:
164 result.append(u'</%s>' % op_tag)
172 result.append(u'</%s>' % op_tag)
165
173
166 result.append(u'</span>')
174 result.append(u'</span>')
167
175
168 html = ''.join(result)
176 html = ''.join(result)
169 return html
177 return html
170
178
171
179
172 def rollup_tokenstream(tokenstream):
180 def rollup_tokenstream(tokenstream):
173 """
181 """
174 Group a token stream of the format:
182 Group a token stream of the format:
175
183
176 ('class', 'op', 'text')
184 ('class', 'op', 'text')
177 or
185 or
178 ('class', 'text')
186 ('class', 'text')
179
187
180 into
188 into
181
189
182 [('class1',
190 [('class1',
183 [('op1', 'text'),
191 [('op1', 'text'),
184 ('op2', 'text')]),
192 ('op2', 'text')]),
185 ('class2',
193 ('class2',
186 [('op3', 'text')])]
194 [('op3', 'text')])]
187
195
188 This is used to get the minimal tags necessary when
196 This is used to get the minimal tags necessary when
189 rendering to html eg for a token stream ie.
197 rendering to html eg for a token stream ie.
190
198
191 <span class="A"><ins>he</ins>llo</span>
199 <span class="A"><ins>he</ins>llo</span>
192 vs
200 vs
193 <span class="A"><ins>he</ins></span><span class="A">llo</span>
201 <span class="A"><ins>he</ins></span><span class="A">llo</span>
194
202
195 If a 2 tuple is passed in, the output op will be an empty string.
203 If a 2 tuple is passed in, the output op will be an empty string.
196
204
197 eg:
205 eg:
198
206
199 >>> rollup_tokenstream([('classA', '', 'h'),
207 >>> rollup_tokenstream([('classA', '', 'h'),
200 ('classA', 'del', 'ell'),
208 ('classA', 'del', 'ell'),
201 ('classA', '', 'o'),
209 ('classA', '', 'o'),
202 ('classB', '', ' '),
210 ('classB', '', ' '),
203 ('classA', '', 'the'),
211 ('classA', '', 'the'),
204 ('classA', '', 're'),
212 ('classA', '', 're'),
205 ])
213 ])
206
214
207 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
215 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
208 ('classB', [('', ' ')],
216 ('classB', [('', ' ')],
209 ('classA', [('', 'there')]]
217 ('classA', [('', 'there')]]
210
218
211 """
219 """
212 if tokenstream and len(tokenstream[0]) == 2:
220 if tokenstream and len(tokenstream[0]) == 2:
213 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
221 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
214
222
215 result = []
223 result = []
216 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
224 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
217 ops = []
225 ops = []
218 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
226 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
219 text_buffer = []
227 text_buffer = []
220 for t_class, t_op, t_text in token_text_list:
228 for t_class, t_op, t_text in token_text_list:
221 text_buffer.append(t_text)
229 text_buffer.append(t_text)
222 ops.append((token_op, ''.join(text_buffer)))
230 ops.append((token_op, ''.join(text_buffer)))
223 result.append((token_class, ops))
231 result.append((token_class, ops))
224 return result
232 return result
225
233
226
234
227 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
235 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
228 """
236 """
229 Converts a list of (token_class, token_text) tuples to a list of
237 Converts a list of (token_class, token_text) tuples to a list of
230 (token_class, token_op, token_text) tuples where token_op is one of
238 (token_class, token_op, token_text) tuples where token_op is one of
231 ('ins', 'del', '')
239 ('ins', 'del', '')
232
240
233 :param old_tokens: list of (token_class, token_text) tuples of old line
241 :param old_tokens: list of (token_class, token_text) tuples of old line
234 :param new_tokens: list of (token_class, token_text) tuples of new line
242 :param new_tokens: list of (token_class, token_text) tuples of new line
235 :param use_diff_match_patch: boolean, will use google's diff match patch
243 :param use_diff_match_patch: boolean, will use google's diff match patch
236 library which has options to 'smooth' out the character by character
244 library which has options to 'smooth' out the character by character
237 differences making nicer ins/del blocks
245 differences making nicer ins/del blocks
238 """
246 """
239
247
240 old_tokens_result = []
248 old_tokens_result = []
241 new_tokens_result = []
249 new_tokens_result = []
242
250
243 similarity = difflib.SequenceMatcher(None,
251 similarity = difflib.SequenceMatcher(None,
244 ''.join(token_text for token_class, token_text in old_tokens),
252 ''.join(token_text for token_class, token_text in old_tokens),
245 ''.join(token_text for token_class, token_text in new_tokens)
253 ''.join(token_text for token_class, token_text in new_tokens)
246 ).ratio()
254 ).ratio()
247
255
248 if similarity < 0.6: # return, the blocks are too different
256 if similarity < 0.6: # return, the blocks are too different
249 for token_class, token_text in old_tokens:
257 for token_class, token_text in old_tokens:
250 old_tokens_result.append((token_class, '', token_text))
258 old_tokens_result.append((token_class, '', token_text))
251 for token_class, token_text in new_tokens:
259 for token_class, token_text in new_tokens:
252 new_tokens_result.append((token_class, '', token_text))
260 new_tokens_result.append((token_class, '', token_text))
253 return old_tokens_result, new_tokens_result, similarity
261 return old_tokens_result, new_tokens_result, similarity
254
262
255 token_sequence_matcher = difflib.SequenceMatcher(None,
263 token_sequence_matcher = difflib.SequenceMatcher(None,
256 [x[1] for x in old_tokens],
264 [x[1] for x in old_tokens],
257 [x[1] for x in new_tokens])
265 [x[1] for x in new_tokens])
258
266
259 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
267 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
260 # check the differences by token block types first to give a more
268 # check the differences by token block types first to give a more
261 # nicer "block" level replacement vs character diffs
269 # nicer "block" level replacement vs character diffs
262
270
263 if tag == 'equal':
271 if tag == 'equal':
264 for token_class, token_text in old_tokens[o1:o2]:
272 for token_class, token_text in old_tokens[o1:o2]:
265 old_tokens_result.append((token_class, '', token_text))
273 old_tokens_result.append((token_class, '', token_text))
266 for token_class, token_text in new_tokens[n1:n2]:
274 for token_class, token_text in new_tokens[n1:n2]:
267 new_tokens_result.append((token_class, '', token_text))
275 new_tokens_result.append((token_class, '', token_text))
268 elif tag == 'delete':
276 elif tag == 'delete':
269 for token_class, token_text in old_tokens[o1:o2]:
277 for token_class, token_text in old_tokens[o1:o2]:
270 old_tokens_result.append((token_class, 'del', token_text))
278 old_tokens_result.append((token_class, 'del', token_text))
271 elif tag == 'insert':
279 elif tag == 'insert':
272 for token_class, token_text in new_tokens[n1:n2]:
280 for token_class, token_text in new_tokens[n1:n2]:
273 new_tokens_result.append((token_class, 'ins', token_text))
281 new_tokens_result.append((token_class, 'ins', token_text))
274 elif tag == 'replace':
282 elif tag == 'replace':
275 # if same type token blocks must be replaced, do a diff on the
283 # if same type token blocks must be replaced, do a diff on the
276 # characters in the token blocks to show individual changes
284 # characters in the token blocks to show individual changes
277
285
278 old_char_tokens = []
286 old_char_tokens = []
279 new_char_tokens = []
287 new_char_tokens = []
280 for token_class, token_text in old_tokens[o1:o2]:
288 for token_class, token_text in old_tokens[o1:o2]:
281 for char in token_text:
289 for char in token_text:
282 old_char_tokens.append((token_class, char))
290 old_char_tokens.append((token_class, char))
283
291
284 for token_class, token_text in new_tokens[n1:n2]:
292 for token_class, token_text in new_tokens[n1:n2]:
285 for char in token_text:
293 for char in token_text:
286 new_char_tokens.append((token_class, char))
294 new_char_tokens.append((token_class, char))
287
295
288 old_string = ''.join([token_text for
296 old_string = ''.join([token_text for
289 token_class, token_text in old_char_tokens])
297 token_class, token_text in old_char_tokens])
290 new_string = ''.join([token_text for
298 new_string = ''.join([token_text for
291 token_class, token_text in new_char_tokens])
299 token_class, token_text in new_char_tokens])
292
300
293 char_sequence = difflib.SequenceMatcher(
301 char_sequence = difflib.SequenceMatcher(
294 None, old_string, new_string)
302 None, old_string, new_string)
295 copcodes = char_sequence.get_opcodes()
303 copcodes = char_sequence.get_opcodes()
296 obuffer, nbuffer = [], []
304 obuffer, nbuffer = [], []
297
305
298 if use_diff_match_patch:
306 if use_diff_match_patch:
299 dmp = diff_match_patch()
307 dmp = diff_match_patch()
300 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
308 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
301 reps = dmp.diff_main(old_string, new_string)
309 reps = dmp.diff_main(old_string, new_string)
302 dmp.diff_cleanupEfficiency(reps)
310 dmp.diff_cleanupEfficiency(reps)
303
311
304 a, b = 0, 0
312 a, b = 0, 0
305 for op, rep in reps:
313 for op, rep in reps:
306 l = len(rep)
314 l = len(rep)
307 if op == 0:
315 if op == 0:
308 for i, c in enumerate(rep):
316 for i, c in enumerate(rep):
309 obuffer.append((old_char_tokens[a+i][0], '', c))
317 obuffer.append((old_char_tokens[a+i][0], '', c))
310 nbuffer.append((new_char_tokens[b+i][0], '', c))
318 nbuffer.append((new_char_tokens[b+i][0], '', c))
311 a += l
319 a += l
312 b += l
320 b += l
313 elif op == -1:
321 elif op == -1:
314 for i, c in enumerate(rep):
322 for i, c in enumerate(rep):
315 obuffer.append((old_char_tokens[a+i][0], 'del', c))
323 obuffer.append((old_char_tokens[a+i][0], 'del', c))
316 a += l
324 a += l
317 elif op == 1:
325 elif op == 1:
318 for i, c in enumerate(rep):
326 for i, c in enumerate(rep):
319 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
327 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
320 b += l
328 b += l
321 else:
329 else:
322 for ctag, co1, co2, cn1, cn2 in copcodes:
330 for ctag, co1, co2, cn1, cn2 in copcodes:
323 if ctag == 'equal':
331 if ctag == 'equal':
324 for token_class, token_text in old_char_tokens[co1:co2]:
332 for token_class, token_text in old_char_tokens[co1:co2]:
325 obuffer.append((token_class, '', token_text))
333 obuffer.append((token_class, '', token_text))
326 for token_class, token_text in new_char_tokens[cn1:cn2]:
334 for token_class, token_text in new_char_tokens[cn1:cn2]:
327 nbuffer.append((token_class, '', token_text))
335 nbuffer.append((token_class, '', token_text))
328 elif ctag == 'delete':
336 elif ctag == 'delete':
329 for token_class, token_text in old_char_tokens[co1:co2]:
337 for token_class, token_text in old_char_tokens[co1:co2]:
330 obuffer.append((token_class, 'del', token_text))
338 obuffer.append((token_class, 'del', token_text))
331 elif ctag == 'insert':
339 elif ctag == 'insert':
332 for token_class, token_text in new_char_tokens[cn1:cn2]:
340 for token_class, token_text in new_char_tokens[cn1:cn2]:
333 nbuffer.append((token_class, 'ins', token_text))
341 nbuffer.append((token_class, 'ins', token_text))
334 elif ctag == 'replace':
342 elif ctag == 'replace':
335 for token_class, token_text in old_char_tokens[co1:co2]:
343 for token_class, token_text in old_char_tokens[co1:co2]:
336 obuffer.append((token_class, 'del', token_text))
344 obuffer.append((token_class, 'del', token_text))
337 for token_class, token_text in new_char_tokens[cn1:cn2]:
345 for token_class, token_text in new_char_tokens[cn1:cn2]:
338 nbuffer.append((token_class, 'ins', token_text))
346 nbuffer.append((token_class, 'ins', token_text))
339
347
340 old_tokens_result.extend(obuffer)
348 old_tokens_result.extend(obuffer)
341 new_tokens_result.extend(nbuffer)
349 new_tokens_result.extend(nbuffer)
342
350
343 return old_tokens_result, new_tokens_result, similarity
351 return old_tokens_result, new_tokens_result, similarity
344
352
345
353
346 class DiffSet(object):
354 class DiffSet(object):
347 """
355 """
348 An object for parsing the diff result from diffs.DiffProcessor and
356 An object for parsing the diff result from diffs.DiffProcessor and
349 adding highlighting, side by side/unified renderings and line diffs
357 adding highlighting, side by side/unified renderings and line diffs
350 """
358 """
351
359
352 HL_REAL = 'REAL' # highlights using original file, slow
360 HL_REAL = 'REAL' # highlights using original file, slow
353 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
361 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
354 # in the case of multiline code
362 # in the case of multiline code
355 HL_NONE = 'NONE' # no highlighting, fastest
363 HL_NONE = 'NONE' # no highlighting, fastest
356
364
357 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
365 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
358 source_repo_name=None,
366 source_repo_name=None,
359 source_node_getter=lambda filename: None,
367 source_node_getter=lambda filename: None,
360 target_node_getter=lambda filename: None,
368 target_node_getter=lambda filename: None,
361 source_nodes=None, target_nodes=None,
369 source_nodes=None, target_nodes=None,
362 max_file_size_limit=150 * 1024, # files over this size will
370 max_file_size_limit=150 * 1024, # files over this size will
363 # use fast highlighting
371 # use fast highlighting
364 comments=None,
372 comments=None,
365 ):
373 ):
366
374
367 self.highlight_mode = highlight_mode
375 self.highlight_mode = highlight_mode
368 self.highlighted_filenodes = {}
376 self.highlighted_filenodes = {}
369 self.source_node_getter = source_node_getter
377 self.source_node_getter = source_node_getter
370 self.target_node_getter = target_node_getter
378 self.target_node_getter = target_node_getter
371 self.source_nodes = source_nodes or {}
379 self.source_nodes = source_nodes or {}
372 self.target_nodes = target_nodes or {}
380 self.target_nodes = target_nodes or {}
373 self.repo_name = repo_name
381 self.repo_name = repo_name
374 self.source_repo_name = source_repo_name or repo_name
382 self.source_repo_name = source_repo_name or repo_name
375 self.comments = comments or {}
383 self.comments = comments or {}
376 self.comments_store = self.comments.copy()
384 self.comments_store = self.comments.copy()
377 self.max_file_size_limit = max_file_size_limit
385 self.max_file_size_limit = max_file_size_limit
378
386
379 def render_patchset(self, patchset, source_ref=None, target_ref=None):
387 def render_patchset(self, patchset, source_ref=None, target_ref=None):
380 diffset = AttributeDict(dict(
388 diffset = AttributeDict(dict(
381 lines_added=0,
389 lines_added=0,
382 lines_deleted=0,
390 lines_deleted=0,
383 changed_files=0,
391 changed_files=0,
384 files=[],
392 files=[],
385 file_stats={},
393 file_stats={},
386 limited_diff=isinstance(patchset, LimitedDiffContainer),
394 limited_diff=isinstance(patchset, LimitedDiffContainer),
387 repo_name=self.repo_name,
395 repo_name=self.repo_name,
388 source_repo_name=self.source_repo_name,
396 source_repo_name=self.source_repo_name,
389 source_ref=source_ref,
397 source_ref=source_ref,
390 target_ref=target_ref,
398 target_ref=target_ref,
391 ))
399 ))
392 for patch in patchset:
400 for patch in patchset:
393 diffset.file_stats[patch['filename']] = patch['stats']
401 diffset.file_stats[patch['filename']] = patch['stats']
394 filediff = self.render_patch(patch)
402 filediff = self.render_patch(patch)
395 filediff.diffset = diffset
403 filediff.diffset = diffset
396 diffset.files.append(filediff)
404 diffset.files.append(filediff)
397 diffset.changed_files += 1
405 diffset.changed_files += 1
398 if not patch['stats']['binary']:
406 if not patch['stats']['binary']:
399 diffset.lines_added += patch['stats']['added']
407 diffset.lines_added += patch['stats']['added']
400 diffset.lines_deleted += patch['stats']['deleted']
408 diffset.lines_deleted += patch['stats']['deleted']
401
409
402 return diffset
410 return diffset
403
411
404 _lexer_cache = {}
412 _lexer_cache = {}
405
413
406 def _get_lexer_for_filename(self, filename, filenode=None):
414 def _get_lexer_for_filename(self, filename, filenode=None):
407 # cached because we might need to call it twice for source/target
415 # cached because we might need to call it twice for source/target
408 if filename not in self._lexer_cache:
416 if filename not in self._lexer_cache:
409 if filenode:
417 if filenode:
410 lexer = filenode.lexer
418 lexer = filenode.lexer
411 extension = filenode.extension
419 extension = filenode.extension
412 else:
420 else:
413 lexer = FileNode.get_lexer(filename=filename)
421 lexer = FileNode.get_lexer(filename=filename)
414 extension = filename.split('.')[-1]
422 extension = filename.split('.')[-1]
415
423
416 lexer = get_custom_lexer(extension) or lexer
424 lexer = get_custom_lexer(extension) or lexer
417 self._lexer_cache[filename] = lexer
425 self._lexer_cache[filename] = lexer
418 return self._lexer_cache[filename]
426 return self._lexer_cache[filename]
419
427
420 def render_patch(self, patch):
428 def render_patch(self, patch):
421 log.debug('rendering diff for %r' % patch['filename'])
429 log.debug('rendering diff for %r' % patch['filename'])
422
430
423 source_filename = patch['original_filename']
431 source_filename = patch['original_filename']
424 target_filename = patch['filename']
432 target_filename = patch['filename']
425
433
426 source_lexer = plain_text_lexer
434 source_lexer = plain_text_lexer
427 target_lexer = plain_text_lexer
435 target_lexer = plain_text_lexer
428
436
429 if not patch['stats']['binary']:
437 if not patch['stats']['binary']:
430 if self.highlight_mode == self.HL_REAL:
438 if self.highlight_mode == self.HL_REAL:
431 if (source_filename and patch['operation'] in ('D', 'M')
439 if (source_filename and patch['operation'] in ('D', 'M')
432 and source_filename not in self.source_nodes):
440 and source_filename not in self.source_nodes):
433 self.source_nodes[source_filename] = (
441 self.source_nodes[source_filename] = (
434 self.source_node_getter(source_filename))
442 self.source_node_getter(source_filename))
435
443
436 if (target_filename and patch['operation'] in ('A', 'M')
444 if (target_filename and patch['operation'] in ('A', 'M')
437 and target_filename not in self.target_nodes):
445 and target_filename not in self.target_nodes):
438 self.target_nodes[target_filename] = (
446 self.target_nodes[target_filename] = (
439 self.target_node_getter(target_filename))
447 self.target_node_getter(target_filename))
440
448
441 elif self.highlight_mode == self.HL_FAST:
449 elif self.highlight_mode == self.HL_FAST:
442 source_lexer = self._get_lexer_for_filename(source_filename)
450 source_lexer = self._get_lexer_for_filename(source_filename)
443 target_lexer = self._get_lexer_for_filename(target_filename)
451 target_lexer = self._get_lexer_for_filename(target_filename)
444
452
445 source_file = self.source_nodes.get(source_filename, source_filename)
453 source_file = self.source_nodes.get(source_filename, source_filename)
446 target_file = self.target_nodes.get(target_filename, target_filename)
454 target_file = self.target_nodes.get(target_filename, target_filename)
447
455
448 source_filenode, target_filenode = None, None
456 source_filenode, target_filenode = None, None
449
457
450 # TODO: dan: FileNode.lexer works on the content of the file - which
458 # TODO: dan: FileNode.lexer works on the content of the file - which
451 # can be slow - issue #4289 explains a lexer clean up - which once
459 # can be slow - issue #4289 explains a lexer clean up - which once
452 # done can allow caching a lexer for a filenode to avoid the file lookup
460 # done can allow caching a lexer for a filenode to avoid the file lookup
453 if isinstance(source_file, FileNode):
461 if isinstance(source_file, FileNode):
454 source_filenode = source_file
462 source_filenode = source_file
455 #source_lexer = source_file.lexer
463 #source_lexer = source_file.lexer
456 source_lexer = self._get_lexer_for_filename(source_filename)
464 source_lexer = self._get_lexer_for_filename(source_filename)
457 source_file.lexer = source_lexer
465 source_file.lexer = source_lexer
458
466
459 if isinstance(target_file, FileNode):
467 if isinstance(target_file, FileNode):
460 target_filenode = target_file
468 target_filenode = target_file
461 #target_lexer = target_file.lexer
469 #target_lexer = target_file.lexer
462 target_lexer = self._get_lexer_for_filename(target_filename)
470 target_lexer = self._get_lexer_for_filename(target_filename)
463 target_file.lexer = target_lexer
471 target_file.lexer = target_lexer
464
472
465 source_file_path, target_file_path = None, None
473 source_file_path, target_file_path = None, None
466
474
467 if source_filename != '/dev/null':
475 if source_filename != '/dev/null':
468 source_file_path = source_filename
476 source_file_path = source_filename
469 if target_filename != '/dev/null':
477 if target_filename != '/dev/null':
470 target_file_path = target_filename
478 target_file_path = target_filename
471
479
472 source_file_type = source_lexer.name
480 source_file_type = source_lexer.name
473 target_file_type = target_lexer.name
481 target_file_type = target_lexer.name
474
482
475 filediff = AttributeDict({
483 filediff = AttributeDict({
476 'source_file_path': source_file_path,
484 'source_file_path': source_file_path,
477 'target_file_path': target_file_path,
485 'target_file_path': target_file_path,
478 'source_filenode': source_filenode,
486 'source_filenode': source_filenode,
479 'target_filenode': target_filenode,
487 'target_filenode': target_filenode,
480 'source_file_type': target_file_type,
488 'source_file_type': target_file_type,
481 'target_file_type': source_file_type,
489 'target_file_type': source_file_type,
482 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
490 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
483 'operation': patch['operation'],
491 'operation': patch['operation'],
484 'source_mode': patch['stats']['old_mode'],
492 'source_mode': patch['stats']['old_mode'],
485 'target_mode': patch['stats']['new_mode'],
493 'target_mode': patch['stats']['new_mode'],
486 'limited_diff': isinstance(patch, LimitedDiffContainer),
494 'limited_diff': isinstance(patch, LimitedDiffContainer),
487 'hunks': [],
495 'hunks': [],
488 'diffset': self,
496 'diffset': self,
489 })
497 })
490
498
491 for hunk in patch['chunks'][1:]:
499 for hunk in patch['chunks'][1:]:
492 hunkbit = self.parse_hunk(hunk, source_file, target_file)
500 hunkbit = self.parse_hunk(hunk, source_file, target_file)
493 hunkbit.source_file_path = source_file_path
501 hunkbit.source_file_path = source_file_path
494 hunkbit.target_file_path = target_file_path
502 hunkbit.target_file_path = target_file_path
495 filediff.hunks.append(hunkbit)
503 filediff.hunks.append(hunkbit)
496
504
497 left_comments = {}
505 left_comments = {}
498 if source_file_path in self.comments_store:
506 if source_file_path in self.comments_store:
499 for lineno, comments in self.comments_store[source_file_path].items():
507 for lineno, comments in self.comments_store[source_file_path].items():
500 left_comments[lineno] = comments
508 left_comments[lineno] = comments
501
509
502 if target_file_path in self.comments_store:
510 if target_file_path in self.comments_store:
503 for lineno, comments in self.comments_store[target_file_path].items():
511 for lineno, comments in self.comments_store[target_file_path].items():
504 left_comments[lineno] = comments
512 left_comments[lineno] = comments
505 # left comments are one that we couldn't place in diff lines.
513 # left comments are one that we couldn't place in diff lines.
506 # could be outdated, or the diff changed and this line is no
514 # could be outdated, or the diff changed and this line is no
507 # longer available
515 # longer available
508 filediff.left_comments = left_comments
516 filediff.left_comments = left_comments
509
517
510 return filediff
518 return filediff
511
519
512 def parse_hunk(self, hunk, source_file, target_file):
520 def parse_hunk(self, hunk, source_file, target_file):
513 result = AttributeDict(dict(
521 result = AttributeDict(dict(
514 source_start=hunk['source_start'],
522 source_start=hunk['source_start'],
515 source_length=hunk['source_length'],
523 source_length=hunk['source_length'],
516 target_start=hunk['target_start'],
524 target_start=hunk['target_start'],
517 target_length=hunk['target_length'],
525 target_length=hunk['target_length'],
518 section_header=hunk['section_header'],
526 section_header=hunk['section_header'],
519 lines=[],
527 lines=[],
520 ))
528 ))
521 before, after = [], []
529 before, after = [], []
522
530
523 for line in hunk['lines']:
531 for line in hunk['lines']:
524
532
525 if line['action'] == 'unmod':
533 if line['action'] == 'unmod':
526 result.lines.extend(
534 result.lines.extend(
527 self.parse_lines(before, after, source_file, target_file))
535 self.parse_lines(before, after, source_file, target_file))
528 after.append(line)
536 after.append(line)
529 before.append(line)
537 before.append(line)
530 elif line['action'] == 'add':
538 elif line['action'] == 'add':
531 after.append(line)
539 after.append(line)
532 elif line['action'] == 'del':
540 elif line['action'] == 'del':
533 before.append(line)
541 before.append(line)
534 elif line['action'] == 'old-no-nl':
542 elif line['action'] == 'old-no-nl':
535 before.append(line)
543 before.append(line)
536 elif line['action'] == 'new-no-nl':
544 elif line['action'] == 'new-no-nl':
537 after.append(line)
545 after.append(line)
538
546
539 result.lines.extend(
547 result.lines.extend(
540 self.parse_lines(before, after, source_file, target_file))
548 self.parse_lines(before, after, source_file, target_file))
541 result.unified = self.as_unified(result.lines)
549 result.unified = self.as_unified(result.lines)
542 result.sideside = result.lines
550 result.sideside = result.lines
543
551
544 return result
552 return result
545
553
546 def parse_lines(self, before_lines, after_lines, source_file, target_file):
554 def parse_lines(self, before_lines, after_lines, source_file, target_file):
547 # TODO: dan: investigate doing the diff comparison and fast highlighting
555 # TODO: dan: investigate doing the diff comparison and fast highlighting
548 # on the entire before and after buffered block lines rather than by
556 # on the entire before and after buffered block lines rather than by
549 # line, this means we can get better 'fast' highlighting if the context
557 # line, this means we can get better 'fast' highlighting if the context
550 # allows it - eg.
558 # allows it - eg.
551 # line 4: """
559 # line 4: """
552 # line 5: this gets highlighted as a string
560 # line 5: this gets highlighted as a string
553 # line 6: """
561 # line 6: """
554
562
555 lines = []
563 lines = []
556
564
557 before_newline = AttributeDict()
565 before_newline = AttributeDict()
558 after_newline = AttributeDict()
566 after_newline = AttributeDict()
559 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
567 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
560 before_newline_line = before_lines.pop(-1)
568 before_newline_line = before_lines.pop(-1)
561 before_newline.content = '\n {}'.format(
569 before_newline.content = '\n {}'.format(
562 render_tokenstream(
570 render_tokenstream(
563 [(x[0], '', x[1])
571 [(x[0], '', x[1])
564 for x in [('nonl', before_newline_line['line'])]]))
572 for x in [('nonl', before_newline_line['line'])]]))
565
573
566 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
574 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
567 after_newline_line = after_lines.pop(-1)
575 after_newline_line = after_lines.pop(-1)
568 after_newline.content = '\n {}'.format(
576 after_newline.content = '\n {}'.format(
569 render_tokenstream(
577 render_tokenstream(
570 [(x[0], '', x[1])
578 [(x[0], '', x[1])
571 for x in [('nonl', after_newline_line['line'])]]))
579 for x in [('nonl', after_newline_line['line'])]]))
572
580
573 while before_lines or after_lines:
581 while before_lines or after_lines:
574 before, after = None, None
582 before, after = None, None
575 before_tokens, after_tokens = None, None
583 before_tokens, after_tokens = None, None
576
584
577 if before_lines:
585 if before_lines:
578 before = before_lines.pop(0)
586 before = before_lines.pop(0)
579 if after_lines:
587 if after_lines:
580 after = after_lines.pop(0)
588 after = after_lines.pop(0)
581
589
582 original = AttributeDict()
590 original = AttributeDict()
583 modified = AttributeDict()
591 modified = AttributeDict()
584
592
585 if before:
593 if before:
586 if before['action'] == 'old-no-nl':
594 if before['action'] == 'old-no-nl':
587 before_tokens = [('nonl', before['line'])]
595 before_tokens = [('nonl', before['line'])]
588 else:
596 else:
589 before_tokens = self.get_line_tokens(
597 before_tokens = self.get_line_tokens(
590 line_text=before['line'],
598 line_text=before['line'],
591 line_number=before['old_lineno'],
599 line_number=before['old_lineno'],
592 file=source_file)
600 file=source_file)
593 original.lineno = before['old_lineno']
601 original.lineno = before['old_lineno']
594 original.content = before['line']
602 original.content = before['line']
595 original.action = self.action_to_op(before['action'])
603 original.action = self.action_to_op(before['action'])
596 original.comments = self.get_comments_for('old',
604 original.comments = self.get_comments_for('old',
597 source_file, before['old_lineno'])
605 source_file, before['old_lineno'])
598
606
599 if after:
607 if after:
600 if after['action'] == 'new-no-nl':
608 if after['action'] == 'new-no-nl':
601 after_tokens = [('nonl', after['line'])]
609 after_tokens = [('nonl', after['line'])]
602 else:
610 else:
603 after_tokens = self.get_line_tokens(
611 after_tokens = self.get_line_tokens(
604 line_text=after['line'], line_number=after['new_lineno'],
612 line_text=after['line'], line_number=after['new_lineno'],
605 file=target_file)
613 file=target_file)
606 modified.lineno = after['new_lineno']
614 modified.lineno = after['new_lineno']
607 modified.content = after['line']
615 modified.content = after['line']
608 modified.action = self.action_to_op(after['action'])
616 modified.action = self.action_to_op(after['action'])
609 modified.comments = self.get_comments_for('new',
617 modified.comments = self.get_comments_for('new',
610 target_file, after['new_lineno'])
618 target_file, after['new_lineno'])
611
619
612 # diff the lines
620 # diff the lines
613 if before_tokens and after_tokens:
621 if before_tokens and after_tokens:
614 o_tokens, m_tokens, similarity = tokens_diff(
622 o_tokens, m_tokens, similarity = tokens_diff(
615 before_tokens, after_tokens)
623 before_tokens, after_tokens)
616 original.content = render_tokenstream(o_tokens)
624 original.content = render_tokenstream(o_tokens)
617 modified.content = render_tokenstream(m_tokens)
625 modified.content = render_tokenstream(m_tokens)
618 elif before_tokens:
626 elif before_tokens:
619 original.content = render_tokenstream(
627 original.content = render_tokenstream(
620 [(x[0], '', x[1]) for x in before_tokens])
628 [(x[0], '', x[1]) for x in before_tokens])
621 elif after_tokens:
629 elif after_tokens:
622 modified.content = render_tokenstream(
630 modified.content = render_tokenstream(
623 [(x[0], '', x[1]) for x in after_tokens])
631 [(x[0], '', x[1]) for x in after_tokens])
624
632
625 if not before_lines and before_newline:
633 if not before_lines and before_newline:
626 original.content += before_newline.content
634 original.content += before_newline.content
627 before_newline = None
635 before_newline = None
628 if not after_lines and after_newline:
636 if not after_lines and after_newline:
629 modified.content += after_newline.content
637 modified.content += after_newline.content
630 after_newline = None
638 after_newline = None
631
639
632 lines.append(AttributeDict({
640 lines.append(AttributeDict({
633 'original': original,
641 'original': original,
634 'modified': modified,
642 'modified': modified,
635 }))
643 }))
636
644
637 return lines
645 return lines
638
646
639 def get_comments_for(self, version, filename, line_number):
647 def get_comments_for(self, version, filename, line_number):
640 if hasattr(filename, 'unicode_path'):
648 if hasattr(filename, 'unicode_path'):
641 filename = filename.unicode_path
649 filename = filename.unicode_path
642
650
643 if not isinstance(filename, basestring):
651 if not isinstance(filename, basestring):
644 return None
652 return None
645
653
646 line_key = {
654 line_key = {
647 'old': 'o',
655 'old': 'o',
648 'new': 'n',
656 'new': 'n',
649 }[version] + str(line_number)
657 }[version] + str(line_number)
650
658
651 if filename in self.comments_store:
659 if filename in self.comments_store:
652 file_comments = self.comments_store[filename]
660 file_comments = self.comments_store[filename]
653 if line_key in file_comments:
661 if line_key in file_comments:
654 return file_comments.pop(line_key)
662 return file_comments.pop(line_key)
655
663
656 def get_line_tokens(self, line_text, line_number, file=None):
664 def get_line_tokens(self, line_text, line_number, file=None):
657 filenode = None
665 filenode = None
658 filename = None
666 filename = None
659
667
660 if isinstance(file, basestring):
668 if isinstance(file, basestring):
661 filename = file
669 filename = file
662 elif isinstance(file, FileNode):
670 elif isinstance(file, FileNode):
663 filenode = file
671 filenode = file
664 filename = file.unicode_path
672 filename = file.unicode_path
665
673
666 if self.highlight_mode == self.HL_REAL and filenode:
674 if self.highlight_mode == self.HL_REAL and filenode:
667 lexer = self._get_lexer_for_filename(filename)
675 lexer = self._get_lexer_for_filename(filename)
668 file_size_allowed = file.size < self.max_file_size_limit
676 file_size_allowed = file.size < self.max_file_size_limit
669 if line_number and file_size_allowed:
677 if line_number and file_size_allowed:
670 return self.get_tokenized_filenode_line(
678 return self.get_tokenized_filenode_line(
671 file, line_number, lexer)
679 file, line_number, lexer)
672
680
673 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
681 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
674 lexer = self._get_lexer_for_filename(filename)
682 lexer = self._get_lexer_for_filename(filename)
675 return list(tokenize_string(line_text, lexer))
683 return list(tokenize_string(line_text, lexer))
676
684
677 return list(tokenize_string(line_text, plain_text_lexer))
685 return list(tokenize_string(line_text, plain_text_lexer))
678
686
679 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
687 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
680
688
681 if filenode not in self.highlighted_filenodes:
689 if filenode not in self.highlighted_filenodes:
682 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
690 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
683 self.highlighted_filenodes[filenode] = tokenized_lines
691 self.highlighted_filenodes[filenode] = tokenized_lines
684 return self.highlighted_filenodes[filenode][line_number - 1]
692 return self.highlighted_filenodes[filenode][line_number - 1]
685
693
686 def action_to_op(self, action):
694 def action_to_op(self, action):
687 return {
695 return {
688 'add': '+',
696 'add': '+',
689 'del': '-',
697 'del': '-',
690 'unmod': ' ',
698 'unmod': ' ',
691 'old-no-nl': ' ',
699 'old-no-nl': ' ',
692 'new-no-nl': ' ',
700 'new-no-nl': ' ',
693 }.get(action, action)
701 }.get(action, action)
694
702
695 def as_unified(self, lines):
703 def as_unified(self, lines):
696 """
704 """
697 Return a generator that yields the lines of a diff in unified order
705 Return a generator that yields the lines of a diff in unified order
698 """
706 """
699 def generator():
707 def generator():
700 buf = []
708 buf = []
701 for line in lines:
709 for line in lines:
702
710
703 if buf and not line.original or line.original.action == ' ':
711 if buf and not line.original or line.original.action == ' ':
704 for b in buf:
712 for b in buf:
705 yield b
713 yield b
706 buf = []
714 buf = []
707
715
708 if line.original:
716 if line.original:
709 if line.original.action == ' ':
717 if line.original.action == ' ':
710 yield (line.original.lineno, line.modified.lineno,
718 yield (line.original.lineno, line.modified.lineno,
711 line.original.action, line.original.content,
719 line.original.action, line.original.content,
712 line.original.comments)
720 line.original.comments)
713 continue
721 continue
714
722
715 if line.original.action == '-':
723 if line.original.action == '-':
716 yield (line.original.lineno, None,
724 yield (line.original.lineno, None,
717 line.original.action, line.original.content,
725 line.original.action, line.original.content,
718 line.original.comments)
726 line.original.comments)
719
727
720 if line.modified.action == '+':
728 if line.modified.action == '+':
721 buf.append((
729 buf.append((
722 None, line.modified.lineno,
730 None, line.modified.lineno,
723 line.modified.action, line.modified.content,
731 line.modified.action, line.modified.content,
724 line.modified.comments))
732 line.modified.comments))
725 continue
733 continue
726
734
727 if line.modified:
735 if line.modified:
728 yield (None, line.modified.lineno,
736 yield (None, line.modified.lineno,
729 line.modified.action, line.modified.content,
737 line.modified.action, line.modified.content,
730 line.modified.comments)
738 line.modified.comments)
731
739
732 for b in buf:
740 for b in buf:
733 yield b
741 yield b
734
742
735 return generator()
743 return generator()
@@ -1,1107 +1,1131 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2018 RhodeCode GmbH
3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25
26 import re
26 import re
27 import collections
27 import collections
28 import difflib
28 import difflib
29 import logging
29 import logging
30
30
31 from itertools import tee, imap
31 from itertools import tee, imap
32
32
33 from rhodecode.lib.vcs.exceptions import VCSError
33 from rhodecode.lib.vcs.exceptions import VCSError
34 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
34 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
35 from rhodecode.lib.utils2 import safe_unicode
35 from rhodecode.lib.utils2 import safe_unicode
36
36
37 log = logging.getLogger(__name__)
37 log = logging.getLogger(__name__)
38
38
39 # define max context, a file with more than this numbers of lines is unusable
39 # define max context, a file with more than this numbers of lines is unusable
40 # in browser anyway
40 # in browser anyway
41 MAX_CONTEXT = 1024 * 1014
41 MAX_CONTEXT = 1024 * 1014
42
42
43
43
44 class OPS(object):
44 class OPS(object):
45 ADD = 'A'
45 ADD = 'A'
46 MOD = 'M'
46 MOD = 'M'
47 DEL = 'D'
47 DEL = 'D'
48
48
49
49
50 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
50 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
51 """
51 """
52 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
52 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
53
53
54 :param ignore_whitespace: ignore whitespaces in diff
54 :param ignore_whitespace: ignore whitespaces in diff
55 """
55 """
56 # make sure we pass in default context
56 # make sure we pass in default context
57 context = context or 3
57 context = context or 3
58 # protect against IntOverflow when passing HUGE context
58 # protect against IntOverflow when passing HUGE context
59 if context > MAX_CONTEXT:
59 if context > MAX_CONTEXT:
60 context = MAX_CONTEXT
60 context = MAX_CONTEXT
61
61
62 submodules = filter(lambda o: isinstance(o, SubModuleNode),
62 submodules = filter(lambda o: isinstance(o, SubModuleNode),
63 [filenode_new, filenode_old])
63 [filenode_new, filenode_old])
64 if submodules:
64 if submodules:
65 return ''
65 return ''
66
66
67 for filenode in (filenode_old, filenode_new):
67 for filenode in (filenode_old, filenode_new):
68 if not isinstance(filenode, FileNode):
68 if not isinstance(filenode, FileNode):
69 raise VCSError(
69 raise VCSError(
70 "Given object should be FileNode object, not %s"
70 "Given object should be FileNode object, not %s"
71 % filenode.__class__)
71 % filenode.__class__)
72
72
73 repo = filenode_new.commit.repository
73 repo = filenode_new.commit.repository
74 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
74 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
75 new_commit = filenode_new.commit
75 new_commit = filenode_new.commit
76
76
77 vcs_gitdiff = repo.get_diff(
77 vcs_gitdiff = repo.get_diff(
78 old_commit, new_commit, filenode_new.path,
78 old_commit, new_commit, filenode_new.path,
79 ignore_whitespace, context, path1=filenode_old.path)
79 ignore_whitespace, context, path1=filenode_old.path)
80 return vcs_gitdiff
80 return vcs_gitdiff
81
81
82 NEW_FILENODE = 1
82 NEW_FILENODE = 1
83 DEL_FILENODE = 2
83 DEL_FILENODE = 2
84 MOD_FILENODE = 3
84 MOD_FILENODE = 3
85 RENAMED_FILENODE = 4
85 RENAMED_FILENODE = 4
86 COPIED_FILENODE = 5
86 COPIED_FILENODE = 5
87 CHMOD_FILENODE = 6
87 CHMOD_FILENODE = 6
88 BIN_FILENODE = 7
88 BIN_FILENODE = 7
89
89
90
90
91 class LimitedDiffContainer(object):
91 class LimitedDiffContainer(object):
92
92
93 def __init__(self, diff_limit, cur_diff_size, diff):
93 def __init__(self, diff_limit, cur_diff_size, diff):
94 self.diff = diff
94 self.diff = diff
95 self.diff_limit = diff_limit
95 self.diff_limit = diff_limit
96 self.cur_diff_size = cur_diff_size
96 self.cur_diff_size = cur_diff_size
97
97
98 def __getitem__(self, key):
98 def __getitem__(self, key):
99 return self.diff.__getitem__(key)
99 return self.diff.__getitem__(key)
100
100
101 def __iter__(self):
101 def __iter__(self):
102 for l in self.diff:
102 for l in self.diff:
103 yield l
103 yield l
104
104
105
105
106 class Action(object):
106 class Action(object):
107 """
107 """
108 Contains constants for the action value of the lines in a parsed diff.
108 Contains constants for the action value of the lines in a parsed diff.
109 """
109 """
110
110
111 ADD = 'add'
111 ADD = 'add'
112 DELETE = 'del'
112 DELETE = 'del'
113 UNMODIFIED = 'unmod'
113 UNMODIFIED = 'unmod'
114
114
115 CONTEXT = 'context'
115 CONTEXT = 'context'
116 OLD_NO_NL = 'old-no-nl'
116 OLD_NO_NL = 'old-no-nl'
117 NEW_NO_NL = 'new-no-nl'
117 NEW_NO_NL = 'new-no-nl'
118
118
119
119
120 class DiffProcessor(object):
120 class DiffProcessor(object):
121 """
121 """
122 Give it a unified or git diff and it returns a list of the files that were
122 Give it a unified or git diff and it returns a list of the files that were
123 mentioned in the diff together with a dict of meta information that
123 mentioned in the diff together with a dict of meta information that
124 can be used to render it in a HTML template.
124 can be used to render it in a HTML template.
125
125
126 .. note:: Unicode handling
126 .. note:: Unicode handling
127
127
128 The original diffs are a byte sequence and can contain filenames
128 The original diffs are a byte sequence and can contain filenames
129 in mixed encodings. This class generally returns `unicode` objects
129 in mixed encodings. This class generally returns `unicode` objects
130 since the result is intended for presentation to the user.
130 since the result is intended for presentation to the user.
131
131
132 """
132 """
133 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
133 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
134 _newline_marker = re.compile(r'^\\ No newline at end of file')
134 _newline_marker = re.compile(r'^\\ No newline at end of file')
135
135
136 # used for inline highlighter word split
136 # used for inline highlighter word split
137 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
137 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
138
138
139 # collapse ranges of commits over given number
139 # collapse ranges of commits over given number
140 _collapse_commits_over = 5
140 _collapse_commits_over = 5
141
141
142 def __init__(self, diff, format='gitdiff', diff_limit=None,
142 def __init__(self, diff, format='gitdiff', diff_limit=None,
143 file_limit=None, show_full_diff=True):
143 file_limit=None, show_full_diff=True):
144 """
144 """
145 :param diff: A `Diff` object representing a diff from a vcs backend
145 :param diff: A `Diff` object representing a diff from a vcs backend
146 :param format: format of diff passed, `udiff` or `gitdiff`
146 :param format: format of diff passed, `udiff` or `gitdiff`
147 :param diff_limit: define the size of diff that is considered "big"
147 :param diff_limit: define the size of diff that is considered "big"
148 based on that parameter cut off will be triggered, set to None
148 based on that parameter cut off will be triggered, set to None
149 to show full diff
149 to show full diff
150 """
150 """
151 self._diff = diff
151 self._diff = diff
152 self._format = format
152 self._format = format
153 self.adds = 0
153 self.adds = 0
154 self.removes = 0
154 self.removes = 0
155 # calculate diff size
155 # calculate diff size
156 self.diff_limit = diff_limit
156 self.diff_limit = diff_limit
157 self.file_limit = file_limit
157 self.file_limit = file_limit
158 self.show_full_diff = show_full_diff
158 self.show_full_diff = show_full_diff
159 self.cur_diff_size = 0
159 self.cur_diff_size = 0
160 self.parsed = False
160 self.parsed = False
161 self.parsed_diff = []
161 self.parsed_diff = []
162
162
163 log.debug('Initialized DiffProcessor with %s mode', format)
163 log.debug('Initialized DiffProcessor with %s mode', format)
164 if format == 'gitdiff':
164 if format == 'gitdiff':
165 self.differ = self._highlight_line_difflib
165 self.differ = self._highlight_line_difflib
166 self._parser = self._parse_gitdiff
166 self._parser = self._parse_gitdiff
167 else:
167 else:
168 self.differ = self._highlight_line_udiff
168 self.differ = self._highlight_line_udiff
169 self._parser = self._new_parse_gitdiff
169 self._parser = self._new_parse_gitdiff
170
170
171 def _copy_iterator(self):
171 def _copy_iterator(self):
172 """
172 """
173 make a fresh copy of generator, we should not iterate thru
173 make a fresh copy of generator, we should not iterate thru
174 an original as it's needed for repeating operations on
174 an original as it's needed for repeating operations on
175 this instance of DiffProcessor
175 this instance of DiffProcessor
176 """
176 """
177 self.__udiff, iterator_copy = tee(self.__udiff)
177 self.__udiff, iterator_copy = tee(self.__udiff)
178 return iterator_copy
178 return iterator_copy
179
179
180 def _escaper(self, string):
180 def _escaper(self, string):
181 """
181 """
182 Escaper for diff escapes special chars and checks the diff limit
182 Escaper for diff escapes special chars and checks the diff limit
183
183
184 :param string:
184 :param string:
185 """
185 """
186
187 self.cur_diff_size += len(string)
186 self.cur_diff_size += len(string)
188
187
189 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
188 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
190 raise DiffLimitExceeded('Diff Limit Exceeded')
189 raise DiffLimitExceeded('Diff Limit Exceeded')
191
190
192 return safe_unicode(string)\
191 return string \
193 .replace('&', '&amp;')\
192 .replace('&', '&amp;')\
194 .replace('<', '&lt;')\
193 .replace('<', '&lt;')\
195 .replace('>', '&gt;')
194 .replace('>', '&gt;')
196
195
197 def _line_counter(self, l):
196 def _line_counter(self, l):
198 """
197 """
199 Checks each line and bumps total adds/removes for this diff
198 Checks each line and bumps total adds/removes for this diff
200
199
201 :param l:
200 :param l:
202 """
201 """
203 if l.startswith('+') and not l.startswith('+++'):
202 if l.startswith('+') and not l.startswith('+++'):
204 self.adds += 1
203 self.adds += 1
205 elif l.startswith('-') and not l.startswith('---'):
204 elif l.startswith('-') and not l.startswith('---'):
206 self.removes += 1
205 self.removes += 1
207 return safe_unicode(l)
206 return safe_unicode(l)
208
207
209 def _highlight_line_difflib(self, line, next_):
208 def _highlight_line_difflib(self, line, next_):
210 """
209 """
211 Highlight inline changes in both lines.
210 Highlight inline changes in both lines.
212 """
211 """
213
212
214 if line['action'] == Action.DELETE:
213 if line['action'] == Action.DELETE:
215 old, new = line, next_
214 old, new = line, next_
216 else:
215 else:
217 old, new = next_, line
216 old, new = next_, line
218
217
219 oldwords = self._token_re.split(old['line'])
218 oldwords = self._token_re.split(old['line'])
220 newwords = self._token_re.split(new['line'])
219 newwords = self._token_re.split(new['line'])
221 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
220 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
222
221
223 oldfragments, newfragments = [], []
222 oldfragments, newfragments = [], []
224 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
223 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
225 oldfrag = ''.join(oldwords[i1:i2])
224 oldfrag = ''.join(oldwords[i1:i2])
226 newfrag = ''.join(newwords[j1:j2])
225 newfrag = ''.join(newwords[j1:j2])
227 if tag != 'equal':
226 if tag != 'equal':
228 if oldfrag:
227 if oldfrag:
229 oldfrag = '<del>%s</del>' % oldfrag
228 oldfrag = '<del>%s</del>' % oldfrag
230 if newfrag:
229 if newfrag:
231 newfrag = '<ins>%s</ins>' % newfrag
230 newfrag = '<ins>%s</ins>' % newfrag
232 oldfragments.append(oldfrag)
231 oldfragments.append(oldfrag)
233 newfragments.append(newfrag)
232 newfragments.append(newfrag)
234
233
235 old['line'] = "".join(oldfragments)
234 old['line'] = "".join(oldfragments)
236 new['line'] = "".join(newfragments)
235 new['line'] = "".join(newfragments)
237
236
238 def _highlight_line_udiff(self, line, next_):
237 def _highlight_line_udiff(self, line, next_):
239 """
238 """
240 Highlight inline changes in both lines.
239 Highlight inline changes in both lines.
241 """
240 """
242 start = 0
241 start = 0
243 limit = min(len(line['line']), len(next_['line']))
242 limit = min(len(line['line']), len(next_['line']))
244 while start < limit and line['line'][start] == next_['line'][start]:
243 while start < limit and line['line'][start] == next_['line'][start]:
245 start += 1
244 start += 1
246 end = -1
245 end = -1
247 limit -= start
246 limit -= start
248 while -end <= limit and line['line'][end] == next_['line'][end]:
247 while -end <= limit and line['line'][end] == next_['line'][end]:
249 end -= 1
248 end -= 1
250 end += 1
249 end += 1
251 if start or end:
250 if start or end:
252 def do(l):
251 def do(l):
253 last = end + len(l['line'])
252 last = end + len(l['line'])
254 if l['action'] == Action.ADD:
253 if l['action'] == Action.ADD:
255 tag = 'ins'
254 tag = 'ins'
256 else:
255 else:
257 tag = 'del'
256 tag = 'del'
258 l['line'] = '%s<%s>%s</%s>%s' % (
257 l['line'] = '%s<%s>%s</%s>%s' % (
259 l['line'][:start],
258 l['line'][:start],
260 tag,
259 tag,
261 l['line'][start:last],
260 l['line'][start:last],
262 tag,
261 tag,
263 l['line'][last:]
262 l['line'][last:]
264 )
263 )
265 do(line)
264 do(line)
266 do(next_)
265 do(next_)
267
266
268 def _clean_line(self, line, command):
267 def _clean_line(self, line, command):
269 if command in ['+', '-', ' ']:
268 if command in ['+', '-', ' ']:
270 # only modify the line if it's actually a diff thing
269 # only modify the line if it's actually a diff thing
271 line = line[1:]
270 line = line[1:]
272 return line
271 return line
273
272
274 def _parse_gitdiff(self, inline_diff=True):
273 def _parse_gitdiff(self, inline_diff=True):
275 _files = []
274 _files = []
276 diff_container = lambda arg: arg
275 diff_container = lambda arg: arg
277
276
278 for chunk in self._diff.chunks():
277 for chunk in self._diff.chunks():
279 head = chunk.header
278 head = chunk.header
280
279
281 diff = imap(self._escaper, chunk.diff.splitlines(1))
280 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
282 raw_diff = chunk.raw
281 raw_diff = chunk.raw
283 limited_diff = False
282 limited_diff = False
284 exceeds_limit = False
283 exceeds_limit = False
285
284
286 op = None
285 op = None
287 stats = {
286 stats = {
288 'added': 0,
287 'added': 0,
289 'deleted': 0,
288 'deleted': 0,
290 'binary': False,
289 'binary': False,
291 'ops': {},
290 'ops': {},
292 }
291 }
293
292
294 if head['deleted_file_mode']:
293 if head['deleted_file_mode']:
295 op = OPS.DEL
294 op = OPS.DEL
296 stats['binary'] = True
295 stats['binary'] = True
297 stats['ops'][DEL_FILENODE] = 'deleted file'
296 stats['ops'][DEL_FILENODE] = 'deleted file'
298
297
299 elif head['new_file_mode']:
298 elif head['new_file_mode']:
300 op = OPS.ADD
299 op = OPS.ADD
301 stats['binary'] = True
300 stats['binary'] = True
302 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
301 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
303 else: # modify operation, can be copy, rename or chmod
302 else: # modify operation, can be copy, rename or chmod
304
303
305 # CHMOD
304 # CHMOD
306 if head['new_mode'] and head['old_mode']:
305 if head['new_mode'] and head['old_mode']:
307 op = OPS.MOD
306 op = OPS.MOD
308 stats['binary'] = True
307 stats['binary'] = True
309 stats['ops'][CHMOD_FILENODE] = (
308 stats['ops'][CHMOD_FILENODE] = (
310 'modified file chmod %s => %s' % (
309 'modified file chmod %s => %s' % (
311 head['old_mode'], head['new_mode']))
310 head['old_mode'], head['new_mode']))
312 # RENAME
311 # RENAME
313 if head['rename_from'] != head['rename_to']:
312 if head['rename_from'] != head['rename_to']:
314 op = OPS.MOD
313 op = OPS.MOD
315 stats['binary'] = True
314 stats['binary'] = True
316 stats['ops'][RENAMED_FILENODE] = (
315 stats['ops'][RENAMED_FILENODE] = (
317 'file renamed from %s to %s' % (
316 'file renamed from %s to %s' % (
318 head['rename_from'], head['rename_to']))
317 head['rename_from'], head['rename_to']))
319 # COPY
318 # COPY
320 if head.get('copy_from') and head.get('copy_to'):
319 if head.get('copy_from') and head.get('copy_to'):
321 op = OPS.MOD
320 op = OPS.MOD
322 stats['binary'] = True
321 stats['binary'] = True
323 stats['ops'][COPIED_FILENODE] = (
322 stats['ops'][COPIED_FILENODE] = (
324 'file copied from %s to %s' % (
323 'file copied from %s to %s' % (
325 head['copy_from'], head['copy_to']))
324 head['copy_from'], head['copy_to']))
326
325
327 # If our new parsed headers didn't match anything fallback to
326 # If our new parsed headers didn't match anything fallback to
328 # old style detection
327 # old style detection
329 if op is None:
328 if op is None:
330 if not head['a_file'] and head['b_file']:
329 if not head['a_file'] and head['b_file']:
331 op = OPS.ADD
330 op = OPS.ADD
332 stats['binary'] = True
331 stats['binary'] = True
333 stats['ops'][NEW_FILENODE] = 'new file'
332 stats['ops'][NEW_FILENODE] = 'new file'
334
333
335 elif head['a_file'] and not head['b_file']:
334 elif head['a_file'] and not head['b_file']:
336 op = OPS.DEL
335 op = OPS.DEL
337 stats['binary'] = True
336 stats['binary'] = True
338 stats['ops'][DEL_FILENODE] = 'deleted file'
337 stats['ops'][DEL_FILENODE] = 'deleted file'
339
338
340 # it's not ADD not DELETE
339 # it's not ADD not DELETE
341 if op is None:
340 if op is None:
342 op = OPS.MOD
341 op = OPS.MOD
343 stats['binary'] = True
342 stats['binary'] = True
344 stats['ops'][MOD_FILENODE] = 'modified file'
343 stats['ops'][MOD_FILENODE] = 'modified file'
345
344
346 # a real non-binary diff
345 # a real non-binary diff
347 if head['a_file'] or head['b_file']:
346 if head['a_file'] or head['b_file']:
348 try:
347 try:
349 raw_diff, chunks, _stats = self._parse_lines(diff)
348 raw_diff, chunks, _stats = self._parse_lines(diff)
350 stats['binary'] = False
349 stats['binary'] = False
351 stats['added'] = _stats[0]
350 stats['added'] = _stats[0]
352 stats['deleted'] = _stats[1]
351 stats['deleted'] = _stats[1]
353 # explicit mark that it's a modified file
352 # explicit mark that it's a modified file
354 if op == OPS.MOD:
353 if op == OPS.MOD:
355 stats['ops'][MOD_FILENODE] = 'modified file'
354 stats['ops'][MOD_FILENODE] = 'modified file'
356 exceeds_limit = len(raw_diff) > self.file_limit
355 exceeds_limit = len(raw_diff) > self.file_limit
357
356
358 # changed from _escaper function so we validate size of
357 # changed from _escaper function so we validate size of
359 # each file instead of the whole diff
358 # each file instead of the whole diff
360 # diff will hide big files but still show small ones
359 # diff will hide big files but still show small ones
361 # from my tests, big files are fairly safe to be parsed
360 # from my tests, big files are fairly safe to be parsed
362 # but the browser is the bottleneck
361 # but the browser is the bottleneck
363 if not self.show_full_diff and exceeds_limit:
362 if not self.show_full_diff and exceeds_limit:
364 raise DiffLimitExceeded('File Limit Exceeded')
363 raise DiffLimitExceeded('File Limit Exceeded')
365
364
366 except DiffLimitExceeded:
365 except DiffLimitExceeded:
367 diff_container = lambda _diff: \
366 diff_container = lambda _diff: \
368 LimitedDiffContainer(
367 LimitedDiffContainer(
369 self.diff_limit, self.cur_diff_size, _diff)
368 self.diff_limit, self.cur_diff_size, _diff)
370
369
371 exceeds_limit = len(raw_diff) > self.file_limit
370 exceeds_limit = len(raw_diff) > self.file_limit
372 limited_diff = True
371 limited_diff = True
373 chunks = []
372 chunks = []
374
373
375 else: # GIT format binary patch, or possibly empty diff
374 else: # GIT format binary patch, or possibly empty diff
376 if head['bin_patch']:
375 if head['bin_patch']:
377 # we have operation already extracted, but we mark simply
376 # we have operation already extracted, but we mark simply
378 # it's a diff we wont show for binary files
377 # it's a diff we wont show for binary files
379 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
378 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
380 chunks = []
379 chunks = []
381
380
382 if chunks and not self.show_full_diff and op == OPS.DEL:
381 if chunks and not self.show_full_diff and op == OPS.DEL:
383 # if not full diff mode show deleted file contents
382 # if not full diff mode show deleted file contents
384 # TODO: anderson: if the view is not too big, there is no way
383 # TODO: anderson: if the view is not too big, there is no way
385 # to see the content of the file
384 # to see the content of the file
386 chunks = []
385 chunks = []
387
386
388 chunks.insert(0, [{
387 chunks.insert(0, [{
389 'old_lineno': '',
388 'old_lineno': '',
390 'new_lineno': '',
389 'new_lineno': '',
391 'action': Action.CONTEXT,
390 'action': Action.CONTEXT,
392 'line': msg,
391 'line': msg,
393 } for _op, msg in stats['ops'].iteritems()
392 } for _op, msg in stats['ops'].iteritems()
394 if _op not in [MOD_FILENODE]])
393 if _op not in [MOD_FILENODE]])
395
394
396 _files.append({
395 _files.append({
397 'filename': safe_unicode(head['b_path']),
396 'filename': safe_unicode(head['b_path']),
398 'old_revision': head['a_blob_id'],
397 'old_revision': head['a_blob_id'],
399 'new_revision': head['b_blob_id'],
398 'new_revision': head['b_blob_id'],
400 'chunks': chunks,
399 'chunks': chunks,
401 'raw_diff': safe_unicode(raw_diff),
400 'raw_diff': safe_unicode(raw_diff),
402 'operation': op,
401 'operation': op,
403 'stats': stats,
402 'stats': stats,
404 'exceeds_limit': exceeds_limit,
403 'exceeds_limit': exceeds_limit,
405 'is_limited_diff': limited_diff,
404 'is_limited_diff': limited_diff,
406 })
405 })
407
406
408 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
407 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
409 OPS.DEL: 2}.get(info['operation'])
408 OPS.DEL: 2}.get(info['operation'])
410
409
411 if not inline_diff:
410 if not inline_diff:
412 return diff_container(sorted(_files, key=sorter))
411 return diff_container(sorted(_files, key=sorter))
413
412
414 # highlight inline changes
413 # highlight inline changes
415 for diff_data in _files:
414 for diff_data in _files:
416 for chunk in diff_data['chunks']:
415 for chunk in diff_data['chunks']:
417 lineiter = iter(chunk)
416 lineiter = iter(chunk)
418 try:
417 try:
419 while 1:
418 while 1:
420 line = lineiter.next()
419 line = lineiter.next()
421 if line['action'] not in (
420 if line['action'] not in (
422 Action.UNMODIFIED, Action.CONTEXT):
421 Action.UNMODIFIED, Action.CONTEXT):
423 nextline = lineiter.next()
422 nextline = lineiter.next()
424 if nextline['action'] in ['unmod', 'context'] or \
423 if nextline['action'] in ['unmod', 'context'] or \
425 nextline['action'] == line['action']:
424 nextline['action'] == line['action']:
426 continue
425 continue
427 self.differ(line, nextline)
426 self.differ(line, nextline)
428 except StopIteration:
427 except StopIteration:
429 pass
428 pass
430
429
431 return diff_container(sorted(_files, key=sorter))
430 return diff_container(sorted(_files, key=sorter))
432
431
433 def _check_large_diff(self):
432 def _check_large_diff(self):
434 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
433 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
435 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
434 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
436 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
435 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
437
436
438 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
437 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
439 def _new_parse_gitdiff(self, inline_diff=True):
438 def _new_parse_gitdiff(self, inline_diff=True):
440 _files = []
439 _files = []
441
440
442 # this can be overriden later to a LimitedDiffContainer type
441 # this can be overriden later to a LimitedDiffContainer type
443 diff_container = lambda arg: arg
442 diff_container = lambda arg: arg
444
443
445 for chunk in self._diff.chunks():
444 for chunk in self._diff.chunks():
446 head = chunk.header
445 head = chunk.header
447 log.debug('parsing diff %r' % head)
446 log.debug('parsing diff %r' % head)
448
447
449 raw_diff = chunk.raw
448 raw_diff = chunk.raw
450 limited_diff = False
449 limited_diff = False
451 exceeds_limit = False
450 exceeds_limit = False
452
451
453 op = None
452 op = None
454 stats = {
453 stats = {
455 'added': 0,
454 'added': 0,
456 'deleted': 0,
455 'deleted': 0,
457 'binary': False,
456 'binary': False,
458 'old_mode': None,
457 'old_mode': None,
459 'new_mode': None,
458 'new_mode': None,
460 'ops': {},
459 'ops': {},
461 }
460 }
462 if head['old_mode']:
461 if head['old_mode']:
463 stats['old_mode'] = head['old_mode']
462 stats['old_mode'] = head['old_mode']
464 if head['new_mode']:
463 if head['new_mode']:
465 stats['new_mode'] = head['new_mode']
464 stats['new_mode'] = head['new_mode']
466 if head['b_mode']:
465 if head['b_mode']:
467 stats['new_mode'] = head['b_mode']
466 stats['new_mode'] = head['b_mode']
468
467
469 # delete file
468 # delete file
470 if head['deleted_file_mode']:
469 if head['deleted_file_mode']:
471 op = OPS.DEL
470 op = OPS.DEL
472 stats['binary'] = True
471 stats['binary'] = True
473 stats['ops'][DEL_FILENODE] = 'deleted file'
472 stats['ops'][DEL_FILENODE] = 'deleted file'
474
473
475 # new file
474 # new file
476 elif head['new_file_mode']:
475 elif head['new_file_mode']:
477 op = OPS.ADD
476 op = OPS.ADD
478 stats['binary'] = True
477 stats['binary'] = True
479 stats['old_mode'] = None
478 stats['old_mode'] = None
480 stats['new_mode'] = head['new_file_mode']
479 stats['new_mode'] = head['new_file_mode']
481 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
480 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
482
481
483 # modify operation, can be copy, rename or chmod
482 # modify operation, can be copy, rename or chmod
484 else:
483 else:
485 # CHMOD
484 # CHMOD
486 if head['new_mode'] and head['old_mode']:
485 if head['new_mode'] and head['old_mode']:
487 op = OPS.MOD
486 op = OPS.MOD
488 stats['binary'] = True
487 stats['binary'] = True
489 stats['ops'][CHMOD_FILENODE] = (
488 stats['ops'][CHMOD_FILENODE] = (
490 'modified file chmod %s => %s' % (
489 'modified file chmod %s => %s' % (
491 head['old_mode'], head['new_mode']))
490 head['old_mode'], head['new_mode']))
492
491
493 # RENAME
492 # RENAME
494 if head['rename_from'] != head['rename_to']:
493 if head['rename_from'] != head['rename_to']:
495 op = OPS.MOD
494 op = OPS.MOD
496 stats['binary'] = True
495 stats['binary'] = True
497 stats['renamed'] = (head['rename_from'], head['rename_to'])
496 stats['renamed'] = (head['rename_from'], head['rename_to'])
498 stats['ops'][RENAMED_FILENODE] = (
497 stats['ops'][RENAMED_FILENODE] = (
499 'file renamed from %s to %s' % (
498 'file renamed from %s to %s' % (
500 head['rename_from'], head['rename_to']))
499 head['rename_from'], head['rename_to']))
501 # COPY
500 # COPY
502 if head.get('copy_from') and head.get('copy_to'):
501 if head.get('copy_from') and head.get('copy_to'):
503 op = OPS.MOD
502 op = OPS.MOD
504 stats['binary'] = True
503 stats['binary'] = True
505 stats['copied'] = (head['copy_from'], head['copy_to'])
504 stats['copied'] = (head['copy_from'], head['copy_to'])
506 stats['ops'][COPIED_FILENODE] = (
505 stats['ops'][COPIED_FILENODE] = (
507 'file copied from %s to %s' % (
506 'file copied from %s to %s' % (
508 head['copy_from'], head['copy_to']))
507 head['copy_from'], head['copy_to']))
509
508
510 # If our new parsed headers didn't match anything fallback to
509 # If our new parsed headers didn't match anything fallback to
511 # old style detection
510 # old style detection
512 if op is None:
511 if op is None:
513 if not head['a_file'] and head['b_file']:
512 if not head['a_file'] and head['b_file']:
514 op = OPS.ADD
513 op = OPS.ADD
515 stats['binary'] = True
514 stats['binary'] = True
516 stats['new_file'] = True
515 stats['new_file'] = True
517 stats['ops'][NEW_FILENODE] = 'new file'
516 stats['ops'][NEW_FILENODE] = 'new file'
518
517
519 elif head['a_file'] and not head['b_file']:
518 elif head['a_file'] and not head['b_file']:
520 op = OPS.DEL
519 op = OPS.DEL
521 stats['binary'] = True
520 stats['binary'] = True
522 stats['ops'][DEL_FILENODE] = 'deleted file'
521 stats['ops'][DEL_FILENODE] = 'deleted file'
523
522
524 # it's not ADD not DELETE
523 # it's not ADD not DELETE
525 if op is None:
524 if op is None:
526 op = OPS.MOD
525 op = OPS.MOD
527 stats['binary'] = True
526 stats['binary'] = True
528 stats['ops'][MOD_FILENODE] = 'modified file'
527 stats['ops'][MOD_FILENODE] = 'modified file'
529
528
530 # a real non-binary diff
529 # a real non-binary diff
531 if head['a_file'] or head['b_file']:
530 if head['a_file'] or head['b_file']:
532 diff = iter(chunk.diff.splitlines(1))
531 # simulate splitlines, so we keep the line end part
532 diff = self.diff_splitter(chunk.diff)
533
533
534 # append each file to the diff size
534 # append each file to the diff size
535 raw_chunk_size = len(raw_diff)
535 raw_chunk_size = len(raw_diff)
536
536
537 exceeds_limit = raw_chunk_size > self.file_limit
537 exceeds_limit = raw_chunk_size > self.file_limit
538 self.cur_diff_size += raw_chunk_size
538 self.cur_diff_size += raw_chunk_size
539
539
540 try:
540 try:
541 # Check each file instead of the whole diff.
541 # Check each file instead of the whole diff.
542 # Diff will hide big files but still show small ones.
542 # Diff will hide big files but still show small ones.
543 # From the tests big files are fairly safe to be parsed
543 # From the tests big files are fairly safe to be parsed
544 # but the browser is the bottleneck.
544 # but the browser is the bottleneck.
545 if not self.show_full_diff and exceeds_limit:
545 if not self.show_full_diff and exceeds_limit:
546 log.debug('File `%s` exceeds current file_limit of %s',
546 log.debug('File `%s` exceeds current file_limit of %s',
547 safe_unicode(head['b_path']), self.file_limit)
547 safe_unicode(head['b_path']), self.file_limit)
548 raise DiffLimitExceeded(
548 raise DiffLimitExceeded(
549 'File Limit %s Exceeded', self.file_limit)
549 'File Limit %s Exceeded', self.file_limit)
550
550
551 self._check_large_diff()
551 self._check_large_diff()
552
552
553 raw_diff, chunks, _stats = self._new_parse_lines(diff)
553 raw_diff, chunks, _stats = self._new_parse_lines(diff)
554 stats['binary'] = False
554 stats['binary'] = False
555 stats['added'] = _stats[0]
555 stats['added'] = _stats[0]
556 stats['deleted'] = _stats[1]
556 stats['deleted'] = _stats[1]
557 # explicit mark that it's a modified file
557 # explicit mark that it's a modified file
558 if op == OPS.MOD:
558 if op == OPS.MOD:
559 stats['ops'][MOD_FILENODE] = 'modified file'
559 stats['ops'][MOD_FILENODE] = 'modified file'
560
560
561 except DiffLimitExceeded:
561 except DiffLimitExceeded:
562 diff_container = lambda _diff: \
562 diff_container = lambda _diff: \
563 LimitedDiffContainer(
563 LimitedDiffContainer(
564 self.diff_limit, self.cur_diff_size, _diff)
564 self.diff_limit, self.cur_diff_size, _diff)
565
565
566 limited_diff = True
566 limited_diff = True
567 chunks = []
567 chunks = []
568
568
569 else: # GIT format binary patch, or possibly empty diff
569 else: # GIT format binary patch, or possibly empty diff
570 if head['bin_patch']:
570 if head['bin_patch']:
571 # we have operation already extracted, but we mark simply
571 # we have operation already extracted, but we mark simply
572 # it's a diff we wont show for binary files
572 # it's a diff we wont show for binary files
573 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
573 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
574 chunks = []
574 chunks = []
575
575
576 # Hide content of deleted node by setting empty chunks
576 # Hide content of deleted node by setting empty chunks
577 if chunks and not self.show_full_diff and op == OPS.DEL:
577 if chunks and not self.show_full_diff and op == OPS.DEL:
578 # if not full diff mode show deleted file contents
578 # if not full diff mode show deleted file contents
579 # TODO: anderson: if the view is not too big, there is no way
579 # TODO: anderson: if the view is not too big, there is no way
580 # to see the content of the file
580 # to see the content of the file
581 chunks = []
581 chunks = []
582
582
583 chunks.insert(
583 chunks.insert(
584 0, [{'old_lineno': '',
584 0, [{'old_lineno': '',
585 'new_lineno': '',
585 'new_lineno': '',
586 'action': Action.CONTEXT,
586 'action': Action.CONTEXT,
587 'line': msg,
587 'line': msg,
588 } for _op, msg in stats['ops'].iteritems()
588 } for _op, msg in stats['ops'].iteritems()
589 if _op not in [MOD_FILENODE]])
589 if _op not in [MOD_FILENODE]])
590
590
591 original_filename = safe_unicode(head['a_path'])
591 original_filename = safe_unicode(head['a_path'])
592 _files.append({
592 _files.append({
593 'original_filename': original_filename,
593 'original_filename': original_filename,
594 'filename': safe_unicode(head['b_path']),
594 'filename': safe_unicode(head['b_path']),
595 'old_revision': head['a_blob_id'],
595 'old_revision': head['a_blob_id'],
596 'new_revision': head['b_blob_id'],
596 'new_revision': head['b_blob_id'],
597 'chunks': chunks,
597 'chunks': chunks,
598 'raw_diff': safe_unicode(raw_diff),
598 'raw_diff': safe_unicode(raw_diff),
599 'operation': op,
599 'operation': op,
600 'stats': stats,
600 'stats': stats,
601 'exceeds_limit': exceeds_limit,
601 'exceeds_limit': exceeds_limit,
602 'is_limited_diff': limited_diff,
602 'is_limited_diff': limited_diff,
603 })
603 })
604
604
605 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
605 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
606 OPS.DEL: 2}.get(info['operation'])
606 OPS.DEL: 2}.get(info['operation'])
607
607
608 return diff_container(sorted(_files, key=sorter))
608 return diff_container(sorted(_files, key=sorter))
609
609
610 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
610 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
611 def _parse_lines(self, diff):
611 def _parse_lines(self, diff_iter):
612 """
612 """
613 Parse the diff an return data for the template.
613 Parse the diff an return data for the template.
614 """
614 """
615
615
616 lineiter = iter(diff)
617 stats = [0, 0]
616 stats = [0, 0]
618 chunks = []
617 chunks = []
619 raw_diff = []
618 raw_diff = []
620
619
621 try:
620 try:
622 line = lineiter.next()
621 line = diff_iter.next()
623
622
624 while line:
623 while line:
625 raw_diff.append(line)
624 raw_diff.append(line)
626 lines = []
625 lines = []
627 chunks.append(lines)
626 chunks.append(lines)
628
627
629 match = self._chunk_re.match(line)
628 match = self._chunk_re.match(line)
630
629
631 if not match:
630 if not match:
632 break
631 break
633
632
634 gr = match.groups()
633 gr = match.groups()
635 (old_line, old_end,
634 (old_line, old_end,
636 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
635 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
637 old_line -= 1
636 old_line -= 1
638 new_line -= 1
637 new_line -= 1
639
638
640 context = len(gr) == 5
639 context = len(gr) == 5
641 old_end += old_line
640 old_end += old_line
642 new_end += new_line
641 new_end += new_line
643
642
644 if context:
643 if context:
645 # skip context only if it's first line
644 # skip context only if it's first line
646 if int(gr[0]) > 1:
645 if int(gr[0]) > 1:
647 lines.append({
646 lines.append({
648 'old_lineno': '...',
647 'old_lineno': '...',
649 'new_lineno': '...',
648 'new_lineno': '...',
650 'action': Action.CONTEXT,
649 'action': Action.CONTEXT,
651 'line': line,
650 'line': line,
652 })
651 })
653
652
654 line = lineiter.next()
653 line = diff_iter.next()
655
654
656 while old_line < old_end or new_line < new_end:
655 while old_line < old_end or new_line < new_end:
657 command = ' '
656 command = ' '
658 if line:
657 if line:
659 command = line[0]
658 command = line[0]
660
659
661 affects_old = affects_new = False
660 affects_old = affects_new = False
662
661
663 # ignore those if we don't expect them
662 # ignore those if we don't expect them
664 if command in '#@':
663 if command in '#@':
665 continue
664 continue
666 elif command == '+':
665 elif command == '+':
667 affects_new = True
666 affects_new = True
668 action = Action.ADD
667 action = Action.ADD
669 stats[0] += 1
668 stats[0] += 1
670 elif command == '-':
669 elif command == '-':
671 affects_old = True
670 affects_old = True
672 action = Action.DELETE
671 action = Action.DELETE
673 stats[1] += 1
672 stats[1] += 1
674 else:
673 else:
675 affects_old = affects_new = True
674 affects_old = affects_new = True
676 action = Action.UNMODIFIED
675 action = Action.UNMODIFIED
677
676
678 if not self._newline_marker.match(line):
677 if not self._newline_marker.match(line):
679 old_line += affects_old
678 old_line += affects_old
680 new_line += affects_new
679 new_line += affects_new
681 lines.append({
680 lines.append({
682 'old_lineno': affects_old and old_line or '',
681 'old_lineno': affects_old and old_line or '',
683 'new_lineno': affects_new and new_line or '',
682 'new_lineno': affects_new and new_line or '',
684 'action': action,
683 'action': action,
685 'line': self._clean_line(line, command)
684 'line': self._clean_line(line, command)
686 })
685 })
687 raw_diff.append(line)
686 raw_diff.append(line)
688
687
689 line = lineiter.next()
688 line = diff_iter.next()
690
689
691 if self._newline_marker.match(line):
690 if self._newline_marker.match(line):
692 # we need to append to lines, since this is not
691 # we need to append to lines, since this is not
693 # counted in the line specs of diff
692 # counted in the line specs of diff
694 lines.append({
693 lines.append({
695 'old_lineno': '...',
694 'old_lineno': '...',
696 'new_lineno': '...',
695 'new_lineno': '...',
697 'action': Action.CONTEXT,
696 'action': Action.CONTEXT,
698 'line': self._clean_line(line, command)
697 'line': self._clean_line(line, command)
699 })
698 })
700
699
701 except StopIteration:
700 except StopIteration:
702 pass
701 pass
703 return ''.join(raw_diff), chunks, stats
702 return ''.join(raw_diff), chunks, stats
704
703
705 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
704 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
706 def _new_parse_lines(self, diff_iter):
705 def _new_parse_lines(self, diff_iter):
707 """
706 """
708 Parse the diff an return data for the template.
707 Parse the diff an return data for the template.
709 """
708 """
710
709
711 stats = [0, 0]
710 stats = [0, 0]
712 chunks = []
711 chunks = []
713 raw_diff = []
712 raw_diff = []
714
713
715 diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
716
717 try:
714 try:
718 line = diff_iter.next()
715 line = diff_iter.next()
719
716
720 while line:
717 while line:
721 raw_diff.append(line)
718 raw_diff.append(line)
719 # match header e.g @@ -0,0 +1 @@\n'
722 match = self._chunk_re.match(line)
720 match = self._chunk_re.match(line)
723
721
724 if not match:
722 if not match:
725 break
723 break
726
724
727 gr = match.groups()
725 gr = match.groups()
728 (old_line, old_end,
726 (old_line, old_end,
729 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
727 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
730
728
731 lines = []
729 lines = []
732 hunk = {
730 hunk = {
733 'section_header': gr[-1],
731 'section_header': gr[-1],
734 'source_start': old_line,
732 'source_start': old_line,
735 'source_length': old_end,
733 'source_length': old_end,
736 'target_start': new_line,
734 'target_start': new_line,
737 'target_length': new_end,
735 'target_length': new_end,
738 'lines': lines,
736 'lines': lines,
739 }
737 }
740 chunks.append(hunk)
738 chunks.append(hunk)
741
739
742 old_line -= 1
740 old_line -= 1
743 new_line -= 1
741 new_line -= 1
744
742
745 context = len(gr) == 5
743 context = len(gr) == 5
746 old_end += old_line
744 old_end += old_line
747 new_end += new_line
745 new_end += new_line
748
746
749 line = diff_iter.next()
747 line = diff_iter.next()
750
748
751 while old_line < old_end or new_line < new_end:
749 while old_line < old_end or new_line < new_end:
752 command = ' '
750 command = ' '
753 if line:
751 if line:
754 command = line[0]
752 command = line[0]
755
753
756 affects_old = affects_new = False
754 affects_old = affects_new = False
757
755
758 # ignore those if we don't expect them
756 # ignore those if we don't expect them
759 if command in '#@':
757 if command in '#@':
760 continue
758 continue
761 elif command == '+':
759 elif command == '+':
762 affects_new = True
760 affects_new = True
763 action = Action.ADD
761 action = Action.ADD
764 stats[0] += 1
762 stats[0] += 1
765 elif command == '-':
763 elif command == '-':
766 affects_old = True
764 affects_old = True
767 action = Action.DELETE
765 action = Action.DELETE
768 stats[1] += 1
766 stats[1] += 1
769 else:
767 else:
770 affects_old = affects_new = True
768 affects_old = affects_new = True
771 action = Action.UNMODIFIED
769 action = Action.UNMODIFIED
772
770
773 if not self._newline_marker.match(line):
771 if not self._newline_marker.match(line):
774 old_line += affects_old
772 old_line += affects_old
775 new_line += affects_new
773 new_line += affects_new
776 lines.append({
774 lines.append({
777 'old_lineno': affects_old and old_line or '',
775 'old_lineno': affects_old and old_line or '',
778 'new_lineno': affects_new and new_line or '',
776 'new_lineno': affects_new and new_line or '',
779 'action': action,
777 'action': action,
780 'line': self._clean_line(line, command)
778 'line': self._clean_line(line, command)
781 })
779 })
782 raw_diff.append(line)
780 raw_diff.append(line)
783
781
784 line = diff_iter.next()
782 line = diff_iter.next()
785
783
786 if self._newline_marker.match(line):
784 if self._newline_marker.match(line):
787 # we need to append to lines, since this is not
785 # we need to append to lines, since this is not
788 # counted in the line specs of diff
786 # counted in the line specs of diff
789 if affects_old:
787 if affects_old:
790 action = Action.OLD_NO_NL
788 action = Action.OLD_NO_NL
791 elif affects_new:
789 elif affects_new:
792 action = Action.NEW_NO_NL
790 action = Action.NEW_NO_NL
793 else:
791 else:
794 raise Exception('invalid context for no newline')
792 raise Exception('invalid context for no newline')
795
793
796 lines.append({
794 lines.append({
797 'old_lineno': None,
795 'old_lineno': None,
798 'new_lineno': None,
796 'new_lineno': None,
799 'action': action,
797 'action': action,
800 'line': self._clean_line(line, command)
798 'line': self._clean_line(line, command)
801 })
799 })
802
800
803 except StopIteration:
801 except StopIteration:
804 pass
802 pass
805
803
806 return ''.join(raw_diff), chunks, stats
804 return ''.join(raw_diff), chunks, stats
807
805
808 def _safe_id(self, idstring):
806 def _safe_id(self, idstring):
809 """Make a string safe for including in an id attribute.
807 """Make a string safe for including in an id attribute.
810
808
811 The HTML spec says that id attributes 'must begin with
809 The HTML spec says that id attributes 'must begin with
812 a letter ([A-Za-z]) and may be followed by any number
810 a letter ([A-Za-z]) and may be followed by any number
813 of letters, digits ([0-9]), hyphens ("-"), underscores
811 of letters, digits ([0-9]), hyphens ("-"), underscores
814 ("_"), colons (":"), and periods (".")'. These regexps
812 ("_"), colons (":"), and periods (".")'. These regexps
815 are slightly over-zealous, in that they remove colons
813 are slightly over-zealous, in that they remove colons
816 and periods unnecessarily.
814 and periods unnecessarily.
817
815
818 Whitespace is transformed into underscores, and then
816 Whitespace is transformed into underscores, and then
819 anything which is not a hyphen or a character that
817 anything which is not a hyphen or a character that
820 matches \w (alphanumerics and underscore) is removed.
818 matches \w (alphanumerics and underscore) is removed.
821
819
822 """
820 """
823 # Transform all whitespace to underscore
821 # Transform all whitespace to underscore
824 idstring = re.sub(r'\s', "_", '%s' % idstring)
822 idstring = re.sub(r'\s', "_", '%s' % idstring)
825 # Remove everything that is not a hyphen or a member of \w
823 # Remove everything that is not a hyphen or a member of \w
826 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
824 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
827 return idstring
825 return idstring
828
826
827 @classmethod
828 def diff_splitter(cls, string):
829 """
830 Diff split that emulates .splitlines() but works only on \n
831 """
832 if not string:
833 return
834 elif string == '\n':
835 yield u'\n'
836 else:
837
838 has_newline = string.endswith('\n')
839 elements = string.split('\n')
840 if has_newline:
841 # skip last element as it's empty string from newlines
842 elements = elements[:-1]
843
844 len_elements = len(elements)
845
846 for cnt, line in enumerate(elements, start=1):
847 last_line = cnt == len_elements
848 if last_line and not has_newline:
849 yield safe_unicode(line)
850 else:
851 yield safe_unicode(line) + '\n'
852
829 def prepare(self, inline_diff=True):
853 def prepare(self, inline_diff=True):
830 """
854 """
831 Prepare the passed udiff for HTML rendering.
855 Prepare the passed udiff for HTML rendering.
832
856
833 :return: A list of dicts with diff information.
857 :return: A list of dicts with diff information.
834 """
858 """
835 parsed = self._parser(inline_diff=inline_diff)
859 parsed = self._parser(inline_diff=inline_diff)
836 self.parsed = True
860 self.parsed = True
837 self.parsed_diff = parsed
861 self.parsed_diff = parsed
838 return parsed
862 return parsed
839
863
840 def as_raw(self, diff_lines=None):
864 def as_raw(self, diff_lines=None):
841 """
865 """
842 Returns raw diff as a byte string
866 Returns raw diff as a byte string
843 """
867 """
844 return self._diff.raw
868 return self._diff.raw
845
869
846 def as_html(self, table_class='code-difftable', line_class='line',
870 def as_html(self, table_class='code-difftable', line_class='line',
847 old_lineno_class='lineno old', new_lineno_class='lineno new',
871 old_lineno_class='lineno old', new_lineno_class='lineno new',
848 code_class='code', enable_comments=False, parsed_lines=None):
872 code_class='code', enable_comments=False, parsed_lines=None):
849 """
873 """
850 Return given diff as html table with customized css classes
874 Return given diff as html table with customized css classes
851 """
875 """
852 # TODO(marcink): not sure how to pass in translator
876 # TODO(marcink): not sure how to pass in translator
853 # here in an efficient way, leave the _ for proper gettext extraction
877 # here in an efficient way, leave the _ for proper gettext extraction
854 _ = lambda s: s
878 _ = lambda s: s
855
879
856 def _link_to_if(condition, label, url):
880 def _link_to_if(condition, label, url):
857 """
881 """
858 Generates a link if condition is meet or just the label if not.
882 Generates a link if condition is meet or just the label if not.
859 """
883 """
860
884
861 if condition:
885 if condition:
862 return '''<a href="%(url)s" class="tooltip"
886 return '''<a href="%(url)s" class="tooltip"
863 title="%(title)s">%(label)s</a>''' % {
887 title="%(title)s">%(label)s</a>''' % {
864 'title': _('Click to select line'),
888 'title': _('Click to select line'),
865 'url': url,
889 'url': url,
866 'label': label
890 'label': label
867 }
891 }
868 else:
892 else:
869 return label
893 return label
870 if not self.parsed:
894 if not self.parsed:
871 self.prepare()
895 self.prepare()
872
896
873 diff_lines = self.parsed_diff
897 diff_lines = self.parsed_diff
874 if parsed_lines:
898 if parsed_lines:
875 diff_lines = parsed_lines
899 diff_lines = parsed_lines
876
900
877 _html_empty = True
901 _html_empty = True
878 _html = []
902 _html = []
879 _html.append('''<table class="%(table_class)s">\n''' % {
903 _html.append('''<table class="%(table_class)s">\n''' % {
880 'table_class': table_class
904 'table_class': table_class
881 })
905 })
882
906
883 for diff in diff_lines:
907 for diff in diff_lines:
884 for line in diff['chunks']:
908 for line in diff['chunks']:
885 _html_empty = False
909 _html_empty = False
886 for change in line:
910 for change in line:
887 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
911 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
888 'lc': line_class,
912 'lc': line_class,
889 'action': change['action']
913 'action': change['action']
890 })
914 })
891 anchor_old_id = ''
915 anchor_old_id = ''
892 anchor_new_id = ''
916 anchor_new_id = ''
893 anchor_old = "%(filename)s_o%(oldline_no)s" % {
917 anchor_old = "%(filename)s_o%(oldline_no)s" % {
894 'filename': self._safe_id(diff['filename']),
918 'filename': self._safe_id(diff['filename']),
895 'oldline_no': change['old_lineno']
919 'oldline_no': change['old_lineno']
896 }
920 }
897 anchor_new = "%(filename)s_n%(oldline_no)s" % {
921 anchor_new = "%(filename)s_n%(oldline_no)s" % {
898 'filename': self._safe_id(diff['filename']),
922 'filename': self._safe_id(diff['filename']),
899 'oldline_no': change['new_lineno']
923 'oldline_no': change['new_lineno']
900 }
924 }
901 cond_old = (change['old_lineno'] != '...' and
925 cond_old = (change['old_lineno'] != '...' and
902 change['old_lineno'])
926 change['old_lineno'])
903 cond_new = (change['new_lineno'] != '...' and
927 cond_new = (change['new_lineno'] != '...' and
904 change['new_lineno'])
928 change['new_lineno'])
905 if cond_old:
929 if cond_old:
906 anchor_old_id = 'id="%s"' % anchor_old
930 anchor_old_id = 'id="%s"' % anchor_old
907 if cond_new:
931 if cond_new:
908 anchor_new_id = 'id="%s"' % anchor_new
932 anchor_new_id = 'id="%s"' % anchor_new
909
933
910 if change['action'] != Action.CONTEXT:
934 if change['action'] != Action.CONTEXT:
911 anchor_link = True
935 anchor_link = True
912 else:
936 else:
913 anchor_link = False
937 anchor_link = False
914
938
915 ###########################################################
939 ###########################################################
916 # COMMENT ICONS
940 # COMMENT ICONS
917 ###########################################################
941 ###########################################################
918 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
942 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
919
943
920 if enable_comments and change['action'] != Action.CONTEXT:
944 if enable_comments and change['action'] != Action.CONTEXT:
921 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
945 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
922
946
923 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
947 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
924
948
925 ###########################################################
949 ###########################################################
926 # OLD LINE NUMBER
950 # OLD LINE NUMBER
927 ###########################################################
951 ###########################################################
928 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
952 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
929 'a_id': anchor_old_id,
953 'a_id': anchor_old_id,
930 'olc': old_lineno_class
954 'olc': old_lineno_class
931 })
955 })
932
956
933 _html.append('''%(link)s''' % {
957 _html.append('''%(link)s''' % {
934 'link': _link_to_if(anchor_link, change['old_lineno'],
958 'link': _link_to_if(anchor_link, change['old_lineno'],
935 '#%s' % anchor_old)
959 '#%s' % anchor_old)
936 })
960 })
937 _html.append('''</td>\n''')
961 _html.append('''</td>\n''')
938 ###########################################################
962 ###########################################################
939 # NEW LINE NUMBER
963 # NEW LINE NUMBER
940 ###########################################################
964 ###########################################################
941
965
942 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
966 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
943 'a_id': anchor_new_id,
967 'a_id': anchor_new_id,
944 'nlc': new_lineno_class
968 'nlc': new_lineno_class
945 })
969 })
946
970
947 _html.append('''%(link)s''' % {
971 _html.append('''%(link)s''' % {
948 'link': _link_to_if(anchor_link, change['new_lineno'],
972 'link': _link_to_if(anchor_link, change['new_lineno'],
949 '#%s' % anchor_new)
973 '#%s' % anchor_new)
950 })
974 })
951 _html.append('''</td>\n''')
975 _html.append('''</td>\n''')
952 ###########################################################
976 ###########################################################
953 # CODE
977 # CODE
954 ###########################################################
978 ###########################################################
955 code_classes = [code_class]
979 code_classes = [code_class]
956 if (not enable_comments or
980 if (not enable_comments or
957 change['action'] == Action.CONTEXT):
981 change['action'] == Action.CONTEXT):
958 code_classes.append('no-comment')
982 code_classes.append('no-comment')
959 _html.append('\t<td class="%s">' % ' '.join(code_classes))
983 _html.append('\t<td class="%s">' % ' '.join(code_classes))
960 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
984 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
961 'code': change['line']
985 'code': change['line']
962 })
986 })
963
987
964 _html.append('''\t</td>''')
988 _html.append('''\t</td>''')
965 _html.append('''\n</tr>\n''')
989 _html.append('''\n</tr>\n''')
966 _html.append('''</table>''')
990 _html.append('''</table>''')
967 if _html_empty:
991 if _html_empty:
968 return None
992 return None
969 return ''.join(_html)
993 return ''.join(_html)
970
994
971 def stat(self):
995 def stat(self):
972 """
996 """
973 Returns tuple of added, and removed lines for this instance
997 Returns tuple of added, and removed lines for this instance
974 """
998 """
975 return self.adds, self.removes
999 return self.adds, self.removes
976
1000
977 def get_context_of_line(
1001 def get_context_of_line(
978 self, path, diff_line=None, context_before=3, context_after=3):
1002 self, path, diff_line=None, context_before=3, context_after=3):
979 """
1003 """
980 Returns the context lines for the specified diff line.
1004 Returns the context lines for the specified diff line.
981
1005
982 :type diff_line: :class:`DiffLineNumber`
1006 :type diff_line: :class:`DiffLineNumber`
983 """
1007 """
984 assert self.parsed, "DiffProcessor is not initialized."
1008 assert self.parsed, "DiffProcessor is not initialized."
985
1009
986 if None not in diff_line:
1010 if None not in diff_line:
987 raise ValueError(
1011 raise ValueError(
988 "Cannot specify both line numbers: {}".format(diff_line))
1012 "Cannot specify both line numbers: {}".format(diff_line))
989
1013
990 file_diff = self._get_file_diff(path)
1014 file_diff = self._get_file_diff(path)
991 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1015 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
992
1016
993 first_line_to_include = max(idx - context_before, 0)
1017 first_line_to_include = max(idx - context_before, 0)
994 first_line_after_context = idx + context_after + 1
1018 first_line_after_context = idx + context_after + 1
995 context_lines = chunk[first_line_to_include:first_line_after_context]
1019 context_lines = chunk[first_line_to_include:first_line_after_context]
996
1020
997 line_contents = [
1021 line_contents = [
998 _context_line(line) for line in context_lines
1022 _context_line(line) for line in context_lines
999 if _is_diff_content(line)]
1023 if _is_diff_content(line)]
1000 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1024 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1001 # Once they are fixed, we can drop this line here.
1025 # Once they are fixed, we can drop this line here.
1002 if line_contents:
1026 if line_contents:
1003 line_contents[-1] = (
1027 line_contents[-1] = (
1004 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1028 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1005 return line_contents
1029 return line_contents
1006
1030
1007 def find_context(self, path, context, offset=0):
1031 def find_context(self, path, context, offset=0):
1008 """
1032 """
1009 Finds the given `context` inside of the diff.
1033 Finds the given `context` inside of the diff.
1010
1034
1011 Use the parameter `offset` to specify which offset the target line has
1035 Use the parameter `offset` to specify which offset the target line has
1012 inside of the given `context`. This way the correct diff line will be
1036 inside of the given `context`. This way the correct diff line will be
1013 returned.
1037 returned.
1014
1038
1015 :param offset: Shall be used to specify the offset of the main line
1039 :param offset: Shall be used to specify the offset of the main line
1016 within the given `context`.
1040 within the given `context`.
1017 """
1041 """
1018 if offset < 0 or offset >= len(context):
1042 if offset < 0 or offset >= len(context):
1019 raise ValueError(
1043 raise ValueError(
1020 "Only positive values up to the length of the context "
1044 "Only positive values up to the length of the context "
1021 "minus one are allowed.")
1045 "minus one are allowed.")
1022
1046
1023 matches = []
1047 matches = []
1024 file_diff = self._get_file_diff(path)
1048 file_diff = self._get_file_diff(path)
1025
1049
1026 for chunk in file_diff['chunks']:
1050 for chunk in file_diff['chunks']:
1027 context_iter = iter(context)
1051 context_iter = iter(context)
1028 for line_idx, line in enumerate(chunk):
1052 for line_idx, line in enumerate(chunk):
1029 try:
1053 try:
1030 if _context_line(line) == context_iter.next():
1054 if _context_line(line) == context_iter.next():
1031 continue
1055 continue
1032 except StopIteration:
1056 except StopIteration:
1033 matches.append((line_idx, chunk))
1057 matches.append((line_idx, chunk))
1034 context_iter = iter(context)
1058 context_iter = iter(context)
1035
1059
1036 # Increment position and triger StopIteration
1060 # Increment position and triger StopIteration
1037 # if we had a match at the end
1061 # if we had a match at the end
1038 line_idx += 1
1062 line_idx += 1
1039 try:
1063 try:
1040 context_iter.next()
1064 context_iter.next()
1041 except StopIteration:
1065 except StopIteration:
1042 matches.append((line_idx, chunk))
1066 matches.append((line_idx, chunk))
1043
1067
1044 effective_offset = len(context) - offset
1068 effective_offset = len(context) - offset
1045 found_at_diff_lines = [
1069 found_at_diff_lines = [
1046 _line_to_diff_line_number(chunk[idx - effective_offset])
1070 _line_to_diff_line_number(chunk[idx - effective_offset])
1047 for idx, chunk in matches]
1071 for idx, chunk in matches]
1048
1072
1049 return found_at_diff_lines
1073 return found_at_diff_lines
1050
1074
1051 def _get_file_diff(self, path):
1075 def _get_file_diff(self, path):
1052 for file_diff in self.parsed_diff:
1076 for file_diff in self.parsed_diff:
1053 if file_diff['filename'] == path:
1077 if file_diff['filename'] == path:
1054 break
1078 break
1055 else:
1079 else:
1056 raise FileNotInDiffException("File {} not in diff".format(path))
1080 raise FileNotInDiffException("File {} not in diff".format(path))
1057 return file_diff
1081 return file_diff
1058
1082
1059 def _find_chunk_line_index(self, file_diff, diff_line):
1083 def _find_chunk_line_index(self, file_diff, diff_line):
1060 for chunk in file_diff['chunks']:
1084 for chunk in file_diff['chunks']:
1061 for idx, line in enumerate(chunk):
1085 for idx, line in enumerate(chunk):
1062 if line['old_lineno'] == diff_line.old:
1086 if line['old_lineno'] == diff_line.old:
1063 return chunk, idx
1087 return chunk, idx
1064 if line['new_lineno'] == diff_line.new:
1088 if line['new_lineno'] == diff_line.new:
1065 return chunk, idx
1089 return chunk, idx
1066 raise LineNotInDiffException(
1090 raise LineNotInDiffException(
1067 "The line {} is not part of the diff.".format(diff_line))
1091 "The line {} is not part of the diff.".format(diff_line))
1068
1092
1069
1093
1070 def _is_diff_content(line):
1094 def _is_diff_content(line):
1071 return line['action'] in (
1095 return line['action'] in (
1072 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1096 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1073
1097
1074
1098
1075 def _context_line(line):
1099 def _context_line(line):
1076 return (line['action'], line['line'])
1100 return (line['action'], line['line'])
1077
1101
1078
1102
1079 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1103 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1080
1104
1081
1105
1082 def _line_to_diff_line_number(line):
1106 def _line_to_diff_line_number(line):
1083 new_line_no = line['new_lineno'] or None
1107 new_line_no = line['new_lineno'] or None
1084 old_line_no = line['old_lineno'] or None
1108 old_line_no = line['old_lineno'] or None
1085 return DiffLineNumber(old=old_line_no, new=new_line_no)
1109 return DiffLineNumber(old=old_line_no, new=new_line_no)
1086
1110
1087
1111
1088 class FileNotInDiffException(Exception):
1112 class FileNotInDiffException(Exception):
1089 """
1113 """
1090 Raised when the context for a missing file is requested.
1114 Raised when the context for a missing file is requested.
1091
1115
1092 If you request the context for a line in a file which is not part of the
1116 If you request the context for a line in a file which is not part of the
1093 given diff, then this exception is raised.
1117 given diff, then this exception is raised.
1094 """
1118 """
1095
1119
1096
1120
1097 class LineNotInDiffException(Exception):
1121 class LineNotInDiffException(Exception):
1098 """
1122 """
1099 Raised when the context for a missing line is requested.
1123 Raised when the context for a missing line is requested.
1100
1124
1101 If you request the context for a line in a file and this line is not
1125 If you request the context for a line in a file and this line is not
1102 part of the given diff, then this exception is raised.
1126 part of the given diff, then this exception is raised.
1103 """
1127 """
1104
1128
1105
1129
1106 class DiffLimitExceeded(Exception):
1130 class DiffLimitExceeded(Exception):
1107 pass
1131 pass
@@ -1,331 +1,311 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2016-2018 RhodeCode GmbH
3 # Copyright (C) 2016-2018 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import pytest
21 import pytest
22 from pygments.lexers import get_lexer_by_name
22 from pygments.lexers import get_lexer_by_name
23
23
24 from rhodecode.tests import no_newline_id_generator
24 from rhodecode.tests import no_newline_id_generator
25 from rhodecode.lib.codeblocks import (
25 from rhodecode.lib.codeblocks import (
26 tokenize_string, split_token_stream, rollup_tokenstream,
26 tokenize_string, split_token_stream, rollup_tokenstream,
27 render_tokenstream)
27 render_tokenstream)
28
28
29
29
30 class TestTokenizeString(object):
30 class TestTokenizeString(object):
31
31
32 python_code = '''
32 python_code = '''
33 import this
33 import this
34
34
35 var = 6
35 var = 6
36 print "this"
36 print "this"
37
37
38 '''
38 '''
39
39
40 def test_tokenize_as_python(self):
40 def test_tokenize_as_python(self):
41 lexer = get_lexer_by_name('python')
41 lexer = get_lexer_by_name('python')
42 tokens = list(tokenize_string(self.python_code, lexer))
42 tokens = list(tokenize_string(self.python_code, lexer))
43
43
44 assert tokens == [
44 assert tokens == [
45 ('', u'\n'),
45 ('', u'\n'),
46 ('', u' '),
46 ('', u' '),
47 ('kn', u'import'),
47 ('kn', u'import'),
48 ('', u' '),
48 ('', u' '),
49 ('nn', u'this'),
49 ('nn', u'this'),
50 ('', u'\n'),
50 ('', u'\n'),
51 ('', u'\n'),
51 ('', u'\n'),
52 ('', u' '),
52 ('', u' '),
53 ('n', u'var'),
53 ('n', u'var'),
54 ('', u' '),
54 ('', u' '),
55 ('o', u'='),
55 ('o', u'='),
56 ('', u' '),
56 ('', u' '),
57 ('mi', u'6'),
57 ('mi', u'6'),
58 ('', u'\n'),
58 ('', u'\n'),
59 ('', u' '),
59 ('', u' '),
60 ('k', u'print'),
60 ('k', u'print'),
61 ('', u' '),
61 ('', u' '),
62 ('s2', u'"'),
62 ('s2', u'"'),
63 ('s2', u'this'),
63 ('s2', u'this'),
64 ('s2', u'"'),
64 ('s2', u'"'),
65 ('', u'\n'),
65 ('', u'\n'),
66 ('', u'\n'),
66 ('', u'\n'),
67 ('', u' ')
67 ('', u' ')
68 ]
68 ]
69
69
70 def test_tokenize_as_text(self):
70 def test_tokenize_as_text(self):
71 lexer = get_lexer_by_name('text')
71 lexer = get_lexer_by_name('text')
72 tokens = list(tokenize_string(self.python_code, lexer))
72 tokens = list(tokenize_string(self.python_code, lexer))
73
73
74 assert tokens == [
74 assert tokens == [
75 ('',
75 ('',
76 u'\n import this\n\n var = 6\n print "this"\n\n ')
76 u'\n import this\n\n var = 6\n print "this"\n\n ')
77 ]
77 ]
78
78
79
79
80 class TestSplitTokenStream(object):
80 class TestSplitTokenStream(object):
81
81
82 def test_split_token_stream(self):
82 def test_split_token_stream(self):
83 lines = list(split_token_stream(
83 lines = list(split_token_stream(
84 [('type1', 'some\ntext'), ('type2', 'more\n')]))
84 [('type1', 'some\ntext'), ('type2', 'more\n')]))
85
85
86 assert lines == [
86 assert lines == [
87 [('type1', u'some')],
87 [('type1', u'some')],
88 [('type1', u'text'), ('type2', u'more')],
88 [('type1', u'text'), ('type2', u'more')],
89 [('type2', u'')],
89 [('type2', u'')],
90 ]
90 ]
91
91
92 def test_split_token_stream_other_char(self):
93 lines = list(split_token_stream(
94 [('type1', 'some\ntext'), ('type2', 'more\n')],
95 split_string='m'))
96
97 assert lines == [
98 [('type1', 'so')],
99 [('type1', 'e\ntext'), ('type2', '')],
100 [('type2', 'ore\n')],
101 ]
102
103 def test_split_token_stream_without_char(self):
104 lines = list(split_token_stream(
105 [('type1', 'some\ntext'), ('type2', 'more\n')],
106 split_string='z'))
107
108 assert lines == [
109 [('type1', 'some\ntext'), ('type2', 'more\n')]
110 ]
111
112 def test_split_token_stream_single(self):
92 def test_split_token_stream_single(self):
113 lines = list(split_token_stream(
93 lines = list(split_token_stream(
114 [('type1', '\n')], split_string='\n'))
94 [('type1', '\n')]))
115
95
116 assert lines == [
96 assert lines == [
117 [('type1', '')],
97 [('type1', '')],
118 [('type1', '')],
98 [('type1', '')],
119 ]
99 ]
120
100
121 def test_split_token_stream_single_repeat(self):
101 def test_split_token_stream_single_repeat(self):
122 lines = list(split_token_stream(
102 lines = list(split_token_stream(
123 [('type1', '\n\n\n')], split_string='\n'))
103 [('type1', '\n\n\n')]))
124
104
125 assert lines == [
105 assert lines == [
126 [('type1', '')],
106 [('type1', '')],
127 [('type1', '')],
107 [('type1', '')],
128 [('type1', '')],
108 [('type1', '')],
129 [('type1', '')],
109 [('type1', '')],
130 ]
110 ]
131
111
132 def test_split_token_stream_multiple_repeat(self):
112 def test_split_token_stream_multiple_repeat(self):
133 lines = list(split_token_stream(
113 lines = list(split_token_stream(
134 [('type1', '\n\n'), ('type2', '\n\n')], split_string='\n'))
114 [('type1', '\n\n'), ('type2', '\n\n')]))
135
115
136 assert lines == [
116 assert lines == [
137 [('type1', '')],
117 [('type1', '')],
138 [('type1', '')],
118 [('type1', '')],
139 [('type1', ''), ('type2', '')],
119 [('type1', ''), ('type2', '')],
140 [('type2', '')],
120 [('type2', '')],
141 [('type2', '')],
121 [('type2', '')],
142 ]
122 ]
143
123
144
124
145 class TestRollupTokens(object):
125 class TestRollupTokens(object):
146
126
147 @pytest.mark.parametrize('tokenstream,output', [
127 @pytest.mark.parametrize('tokenstream,output', [
148 ([],
128 ([],
149 []),
129 []),
150 ([('A', 'hell'), ('A', 'o')], [
130 ([('A', 'hell'), ('A', 'o')], [
151 ('A', [
131 ('A', [
152 ('', 'hello')]),
132 ('', 'hello')]),
153 ]),
133 ]),
154 ([('A', 'hell'), ('B', 'o')], [
134 ([('A', 'hell'), ('B', 'o')], [
155 ('A', [
135 ('A', [
156 ('', 'hell')]),
136 ('', 'hell')]),
157 ('B', [
137 ('B', [
158 ('', 'o')]),
138 ('', 'o')]),
159 ]),
139 ]),
160 ([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [
140 ([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [
161 ('A', [
141 ('A', [
162 ('', 'hello')]),
142 ('', 'hello')]),
163 ('B', [
143 ('B', [
164 ('', ' ')]),
144 ('', ' ')]),
165 ('A', [
145 ('A', [
166 ('', 'there')]),
146 ('', 'there')]),
167 ]),
147 ]),
168 ])
148 ])
169 def test_rollup_tokenstream_without_ops(self, tokenstream, output):
149 def test_rollup_tokenstream_without_ops(self, tokenstream, output):
170 assert list(rollup_tokenstream(tokenstream)) == output
150 assert list(rollup_tokenstream(tokenstream)) == output
171
151
172 @pytest.mark.parametrize('tokenstream,output', [
152 @pytest.mark.parametrize('tokenstream,output', [
173 ([],
153 ([],
174 []),
154 []),
175 ([('A', '', 'hell'), ('A', '', 'o')], [
155 ([('A', '', 'hell'), ('A', '', 'o')], [
176 ('A', [
156 ('A', [
177 ('', 'hello')]),
157 ('', 'hello')]),
178 ]),
158 ]),
179 ([('A', '', 'hell'), ('B', '', 'o')], [
159 ([('A', '', 'hell'), ('B', '', 'o')], [
180 ('A', [
160 ('A', [
181 ('', 'hell')]),
161 ('', 'hell')]),
182 ('B', [
162 ('B', [
183 ('', 'o')]),
163 ('', 'o')]),
184 ]),
164 ]),
185 ([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [
165 ([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [
186 ('A', [
166 ('A', [
187 ('', 'h')]),
167 ('', 'h')]),
188 ('B', [
168 ('B', [
189 ('', 'e')]),
169 ('', 'e')]),
190 ('C', [
170 ('C', [
191 ('', 'y')]),
171 ('', 'y')]),
192 ]),
172 ]),
193 ([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [
173 ([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [
194 ('A', [
174 ('A', [
195 ('', 'he')]),
175 ('', 'he')]),
196 ('C', [
176 ('C', [
197 ('', 'y')]),
177 ('', 'y')]),
198 ]),
178 ]),
199 ([('A', 'ins', 'h'), ('A', 'ins', 'e')], [
179 ([('A', 'ins', 'h'), ('A', 'ins', 'e')], [
200 ('A', [
180 ('A', [
201 ('ins', 'he')
181 ('ins', 'he')
202 ]),
182 ]),
203 ]),
183 ]),
204 ([('A', 'ins', 'h'), ('A', 'del', 'e')], [
184 ([('A', 'ins', 'h'), ('A', 'del', 'e')], [
205 ('A', [
185 ('A', [
206 ('ins', 'h'),
186 ('ins', 'h'),
207 ('del', 'e')
187 ('del', 'e')
208 ]),
188 ]),
209 ]),
189 ]),
210 ([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [
190 ([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [
211 ('A', [
191 ('A', [
212 ('ins', 'h'),
192 ('ins', 'h'),
213 ]),
193 ]),
214 ('B', [
194 ('B', [
215 ('del', 'ey'),
195 ('del', 'ey'),
216 ]),
196 ]),
217 ]),
197 ]),
218 ([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [
198 ([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [
219 ('A', [
199 ('A', [
220 ('ins', 'h'),
200 ('ins', 'h'),
221 ('del', 'e'),
201 ('del', 'e'),
222 ]),
202 ]),
223 ('B', [
203 ('B', [
224 ('del', 'y'),
204 ('del', 'y'),
225 ]),
205 ]),
226 ]),
206 ]),
227 ([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [
207 ([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [
228 ('A', [
208 ('A', [
229 ('', 'some'),
209 ('', 'some'),
230 ('ins', 'new'),
210 ('ins', 'new'),
231 ('', 'name'),
211 ('', 'name'),
232 ]),
212 ]),
233 ]),
213 ]),
234 ])
214 ])
235 def test_rollup_tokenstream_with_ops(self, tokenstream, output):
215 def test_rollup_tokenstream_with_ops(self, tokenstream, output):
236 assert list(rollup_tokenstream(tokenstream)) == output
216 assert list(rollup_tokenstream(tokenstream)) == output
237
217
238
218
239 class TestRenderTokenStream(object):
219 class TestRenderTokenStream(object):
240
220
241 @pytest.mark.parametrize('tokenstream,output', [
221 @pytest.mark.parametrize('tokenstream,output', [
242 (
222 (
243 [],
223 [],
244 '',
224 '',
245 ),
225 ),
246 (
226 (
247 [('', '', u'')],
227 [('', '', u'')],
248 '<span></span>',
228 '<span></span>',
249 ),
229 ),
250 (
230 (
251 [('', '', u'text')],
231 [('', '', u'text')],
252 '<span>text</span>',
232 '<span>text</span>',
253 ),
233 ),
254 (
234 (
255 [('A', '', u'')],
235 [('A', '', u'')],
256 '<span class="A"></span>',
236 '<span class="A"></span>',
257 ),
237 ),
258 (
238 (
259 [('A', '', u'hello')],
239 [('A', '', u'hello')],
260 '<span class="A">hello</span>',
240 '<span class="A">hello</span>',
261 ),
241 ),
262 (
242 (
263 [('A', '', u'hel'), ('A', '', u'lo')],
243 [('A', '', u'hel'), ('A', '', u'lo')],
264 '<span class="A">hello</span>',
244 '<span class="A">hello</span>',
265 ),
245 ),
266 (
246 (
267 [('A', '', u'two\n'), ('A', '', u'lines')],
247 [('A', '', u'two\n'), ('A', '', u'lines')],
268 '<span class="A">two\nlines</span>',
248 '<span class="A">two\nlines</span>',
269 ),
249 ),
270 (
250 (
271 [('A', '', u'\nthree\n'), ('A', '', u'lines')],
251 [('A', '', u'\nthree\n'), ('A', '', u'lines')],
272 '<span class="A">\nthree\nlines</span>',
252 '<span class="A">\nthree\nlines</span>',
273 ),
253 ),
274 (
254 (
275 [('', '', u'\n'), ('A', '', u'line')],
255 [('', '', u'\n'), ('A', '', u'line')],
276 '<span>\n</span><span class="A">line</span>',
256 '<span>\n</span><span class="A">line</span>',
277 ),
257 ),
278 (
258 (
279 [('', 'ins', u'\n'), ('A', '', u'line')],
259 [('', 'ins', u'\n'), ('A', '', u'line')],
280 '<span><ins>\n</ins></span><span class="A">line</span>',
260 '<span><ins>\n</ins></span><span class="A">line</span>',
281 ),
261 ),
282 (
262 (
283 [('A', '', u'hel'), ('A', 'ins', u'lo')],
263 [('A', '', u'hel'), ('A', 'ins', u'lo')],
284 '<span class="A">hel<ins>lo</ins></span>',
264 '<span class="A">hel<ins>lo</ins></span>',
285 ),
265 ),
286 (
266 (
287 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')],
267 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')],
288 '<span class="A">hel<ins>lo</ins></span>',
268 '<span class="A">hel<ins>lo</ins></span>',
289 ),
269 ),
290 (
270 (
291 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')],
271 [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')],
292 '<span class="A">hel<ins>l</ins><del>o</del></span>',
272 '<span class="A">hel<ins>l</ins><del>o</del></span>',
293 ),
273 ),
294 (
274 (
295 [('A', '', u'hel'), ('B', '', u'lo')],
275 [('A', '', u'hel'), ('B', '', u'lo')],
296 '<span class="A">hel</span><span class="B">lo</span>',
276 '<span class="A">hel</span><span class="B">lo</span>',
297 ),
277 ),
298 (
278 (
299 [('A', '', u'hel'), ('B', 'ins', u'lo')],
279 [('A', '', u'hel'), ('B', 'ins', u'lo')],
300 '<span class="A">hel</span><span class="B"><ins>lo</ins></span>',
280 '<span class="A">hel</span><span class="B"><ins>lo</ins></span>',
301 ),
281 ),
302 ], ids=no_newline_id_generator)
282 ], ids=no_newline_id_generator)
303 def test_render_tokenstream_with_ops(self, tokenstream, output):
283 def test_render_tokenstream_with_ops(self, tokenstream, output):
304 html = render_tokenstream(tokenstream)
284 html = render_tokenstream(tokenstream)
305 assert html == output
285 assert html == output
306
286
307 @pytest.mark.parametrize('tokenstream,output', [
287 @pytest.mark.parametrize('tokenstream,output', [
308 (
288 (
309 [('A', u'hel'), ('A', u'lo')],
289 [('A', u'hel'), ('A', u'lo')],
310 '<span class="A">hello</span>',
290 '<span class="A">hello</span>',
311 ),
291 ),
312 (
292 (
313 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
293 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
314 '<span class="A">hello</span>',
294 '<span class="A">hello</span>',
315 ),
295 ),
316 (
296 (
317 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
297 [('A', u'hel'), ('A', u'l'), ('A', u'o')],
318 '<span class="A">hello</span>',
298 '<span class="A">hello</span>',
319 ),
299 ),
320 (
300 (
321 [('A', u'hel'), ('B', u'lo')],
301 [('A', u'hel'), ('B', u'lo')],
322 '<span class="A">hel</span><span class="B">lo</span>',
302 '<span class="A">hel</span><span class="B">lo</span>',
323 ),
303 ),
324 (
304 (
325 [('A', u'hel'), ('B', u'lo')],
305 [('A', u'hel'), ('B', u'lo')],
326 '<span class="A">hel</span><span class="B">lo</span>',
306 '<span class="A">hel</span><span class="B">lo</span>',
327 ),
307 ),
328 ])
308 ])
329 def test_render_tokenstream_without_ops(self, tokenstream, output):
309 def test_render_tokenstream_without_ops(self, tokenstream, output):
330 html = render_tokenstream(tokenstream)
310 html = render_tokenstream(tokenstream)
331 assert html == output
311 assert html == output
@@ -1,813 +1,831 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2010-2018 RhodeCode GmbH
3 # Copyright (C) 2010-2018 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import textwrap
21 import textwrap
22
22
23 import pytest
23 import pytest
24
24
25 from rhodecode.lib.diffs import (
25 from rhodecode.lib.diffs import (
26 DiffProcessor,
26 DiffProcessor,
27 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
27 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
28 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
28 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
29 from rhodecode.tests.fixture import Fixture
29 from rhodecode.tests.fixture import Fixture, no_newline_id_generator
30 from rhodecode.lib.vcs.backends.git.repository import GitDiff
30 from rhodecode.lib.vcs.backends.git.repository import GitDiff
31 from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
31 from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
32 from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
32 from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
33
33
34 fixture = Fixture()
34 fixture = Fixture()
35
35
36
36
37 def test_diffprocessor_as_html_with_comments():
37 def test_diffprocessor_as_html_with_comments():
38 raw_diff = textwrap.dedent('''
38 raw_diff = textwrap.dedent('''
39 diff --git a/setup.py b/setup.py
39 diff --git a/setup.py b/setup.py
40 index 5b36422..cfd698e 100755
40 index 5b36422..cfd698e 100755
41 --- a/setup.py
41 --- a/setup.py
42 +++ b/setup.py
42 +++ b/setup.py
43 @@ -2,7 +2,7 @@
43 @@ -2,7 +2,7 @@
44 #!/usr/bin/python
44 #!/usr/bin/python
45 # Setup file for X
45 # Setup file for X
46 # Copyright (C) No one
46 # Copyright (C) No one
47 -
47 -
48 +x
48 +x
49 try:
49 try:
50 from setuptools import setup, Extension
50 from setuptools import setup, Extension
51 except ImportError:
51 except ImportError:
52 ''')
52 ''')
53 diff = GitDiff(raw_diff)
53 diff = GitDiff(raw_diff)
54 processor = DiffProcessor(diff)
54 processor = DiffProcessor(diff)
55 processor.prepare()
55 processor.prepare()
56
56
57 # Note that the cell with the context in line 5 (in the html) has the
57 # Note that the cell with the context in line 5 (in the html) has the
58 # no-comment class, which will prevent the add comment icon to be displayed.
58 # no-comment class, which will prevent the add comment icon to be displayed.
59 expected_html = textwrap.dedent('''
59 expected_html = textwrap.dedent('''
60 <table class="code-difftable">
60 <table class="code-difftable">
61 <tr class="line context">
61 <tr class="line context">
62 <td class="add-comment-line"><span class="add-comment-content"></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
62 <td class="add-comment-line"><span class="add-comment-content"></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
63 <td class="lineno old">...</td>
63 <td class="lineno old">...</td>
64 <td class="lineno new">...</td>
64 <td class="lineno new">...</td>
65 <td class="code no-comment">
65 <td class="code no-comment">
66 <pre>@@ -2,7 +2,7 @@
66 <pre>@@ -2,7 +2,7 @@
67 </pre>
67 </pre>
68 </td>
68 </td>
69 </tr>
69 </tr>
70 <tr class="line unmod">
70 <tr class="line unmod">
71 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
71 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
72 <td id="setuppy_o2" class="lineno old"><a href="#setuppy_o2" class="tooltip"
72 <td id="setuppy_o2" class="lineno old"><a href="#setuppy_o2" class="tooltip"
73 title="Click to select line">2</a></td>
73 title="Click to select line">2</a></td>
74 <td id="setuppy_n2" class="lineno new"><a href="#setuppy_n2" class="tooltip"
74 <td id="setuppy_n2" class="lineno new"><a href="#setuppy_n2" class="tooltip"
75 title="Click to select line">2</a></td>
75 title="Click to select line">2</a></td>
76 <td class="code">
76 <td class="code">
77 <pre>#!/usr/bin/python
77 <pre>#!/usr/bin/python
78 </pre>
78 </pre>
79 </td>
79 </td>
80 </tr>
80 </tr>
81 <tr class="line unmod">
81 <tr class="line unmod">
82 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
82 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
83 <td id="setuppy_o3" class="lineno old"><a href="#setuppy_o3" class="tooltip"
83 <td id="setuppy_o3" class="lineno old"><a href="#setuppy_o3" class="tooltip"
84 title="Click to select line">3</a></td>
84 title="Click to select line">3</a></td>
85 <td id="setuppy_n3" class="lineno new"><a href="#setuppy_n3" class="tooltip"
85 <td id="setuppy_n3" class="lineno new"><a href="#setuppy_n3" class="tooltip"
86 title="Click to select line">3</a></td>
86 title="Click to select line">3</a></td>
87 <td class="code">
87 <td class="code">
88 <pre># Setup file for X
88 <pre># Setup file for X
89 </pre>
89 </pre>
90 </td>
90 </td>
91 </tr>
91 </tr>
92 <tr class="line unmod">
92 <tr class="line unmod">
93 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
93 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
94 <td id="setuppy_o4" class="lineno old"><a href="#setuppy_o4" class="tooltip"
94 <td id="setuppy_o4" class="lineno old"><a href="#setuppy_o4" class="tooltip"
95 title="Click to select line">4</a></td>
95 title="Click to select line">4</a></td>
96 <td id="setuppy_n4" class="lineno new"><a href="#setuppy_n4" class="tooltip"
96 <td id="setuppy_n4" class="lineno new"><a href="#setuppy_n4" class="tooltip"
97 title="Click to select line">4</a></td>
97 title="Click to select line">4</a></td>
98 <td class="code">
98 <td class="code">
99 <pre># Copyright (C) No one
99 <pre># Copyright (C) No one
100 </pre>
100 </pre>
101 </td>
101 </td>
102 </tr>
102 </tr>
103 <tr class="line del">
103 <tr class="line del">
104 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
104 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
105 <td id="setuppy_o5" class="lineno old"><a href="#setuppy_o5" class="tooltip"
105 <td id="setuppy_o5" class="lineno old"><a href="#setuppy_o5" class="tooltip"
106 title="Click to select line">5</a></td>
106 title="Click to select line">5</a></td>
107 <td class="lineno new"><a href="#setuppy_n" class="tooltip"
107 <td class="lineno new"><a href="#setuppy_n" class="tooltip"
108 title="Click to select line"></a></td>
108 title="Click to select line"></a></td>
109 <td class="code">
109 <td class="code">
110 <pre>
110 <pre>
111 </pre>
111 </pre>
112 </td>
112 </td>
113 </tr>
113 </tr>
114 <tr class="line add">
114 <tr class="line add">
115 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
115 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
116 <td class="lineno old"><a href="#setuppy_o" class="tooltip"
116 <td class="lineno old"><a href="#setuppy_o" class="tooltip"
117 title="Click to select line"></a></td>
117 title="Click to select line"></a></td>
118 <td id="setuppy_n5" class="lineno new"><a href="#setuppy_n5" class="tooltip"
118 <td id="setuppy_n5" class="lineno new"><a href="#setuppy_n5" class="tooltip"
119 title="Click to select line">5</a></td>
119 title="Click to select line">5</a></td>
120 <td class="code">
120 <td class="code">
121 <pre><ins>x</ins>
121 <pre><ins>x</ins>
122 </pre>
122 </pre>
123 </td>
123 </td>
124 </tr>
124 </tr>
125 <tr class="line unmod">
125 <tr class="line unmod">
126 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
126 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
127 <td id="setuppy_o6" class="lineno old"><a href="#setuppy_o6" class="tooltip"
127 <td id="setuppy_o6" class="lineno old"><a href="#setuppy_o6" class="tooltip"
128 title="Click to select line">6</a></td>
128 title="Click to select line">6</a></td>
129 <td id="setuppy_n6" class="lineno new"><a href="#setuppy_n6" class="tooltip"
129 <td id="setuppy_n6" class="lineno new"><a href="#setuppy_n6" class="tooltip"
130 title="Click to select line">6</a></td>
130 title="Click to select line">6</a></td>
131 <td class="code">
131 <td class="code">
132 <pre>try:
132 <pre>try:
133 </pre>
133 </pre>
134 </td>
134 </td>
135 </tr>
135 </tr>
136 <tr class="line unmod">
136 <tr class="line unmod">
137 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
137 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
138 <td id="setuppy_o7" class="lineno old"><a href="#setuppy_o7" class="tooltip"
138 <td id="setuppy_o7" class="lineno old"><a href="#setuppy_o7" class="tooltip"
139 title="Click to select line">7</a></td>
139 title="Click to select line">7</a></td>
140 <td id="setuppy_n7" class="lineno new"><a href="#setuppy_n7" class="tooltip"
140 <td id="setuppy_n7" class="lineno new"><a href="#setuppy_n7" class="tooltip"
141 title="Click to select line">7</a></td>
141 title="Click to select line">7</a></td>
142 <td class="code">
142 <td class="code">
143 <pre> from setuptools import setup, Extension
143 <pre> from setuptools import setup, Extension
144 </pre>
144 </pre>
145 </td>
145 </td>
146 </tr>
146 </tr>
147 <tr class="line unmod">
147 <tr class="line unmod">
148 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
148 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
149 <td id="setuppy_o8" class="lineno old"><a href="#setuppy_o8" class="tooltip"
149 <td id="setuppy_o8" class="lineno old"><a href="#setuppy_o8" class="tooltip"
150 title="Click to select line">8</a></td>
150 title="Click to select line">8</a></td>
151 <td id="setuppy_n8" class="lineno new"><a href="#setuppy_n8" class="tooltip"
151 <td id="setuppy_n8" class="lineno new"><a href="#setuppy_n8" class="tooltip"
152 title="Click to select line">8</a></td>
152 title="Click to select line">8</a></td>
153 <td class="code">
153 <td class="code">
154 <pre>except ImportError:
154 <pre>except ImportError:
155 </pre>
155 </pre>
156 </td>
156 </td>
157 </tr>
157 </tr>
158 </table>
158 </table>
159 ''').strip()
159 ''').strip()
160 html = processor.as_html(enable_comments=True).replace('\t', ' ')
160 html = processor.as_html(enable_comments=True).replace('\t', ' ')
161
161
162 assert html == expected_html
162 assert html == expected_html
163
163
164
164
165 class TestMixedFilenameEncodings:
165 class TestMixedFilenameEncodings(object):
166
166
167 @pytest.fixture(scope="class")
167 @pytest.fixture(scope="class")
168 def raw_diff(self):
168 def raw_diff(self):
169 return fixture.load_resource(
169 return fixture.load_resource(
170 'hg_diff_mixed_filename_encodings.diff')
170 'hg_diff_mixed_filename_encodings.diff')
171
171
172 @pytest.fixture
172 @pytest.fixture
173 def processor(self, raw_diff):
173 def processor(self, raw_diff):
174 diff = MercurialDiff(raw_diff)
174 diff = MercurialDiff(raw_diff)
175 processor = DiffProcessor(diff)
175 processor = DiffProcessor(diff)
176 return processor
176 return processor
177
177
178 def test_filenames_are_decoded_to_unicode(self, processor):
178 def test_filenames_are_decoded_to_unicode(self, processor):
179 diff_data = processor.prepare()
179 diff_data = processor.prepare()
180 filenames = [item['filename'] for item in diff_data]
180 filenames = [item['filename'] for item in diff_data]
181 assert filenames == [
181 assert filenames == [
182 u'spΓ€cial-utf8.txt', u'spοΏ½cial-cp1252.txt', u'spοΏ½cial-latin1.txt']
182 u'spΓ€cial-utf8.txt', u'spοΏ½cial-cp1252.txt', u'spοΏ½cial-latin1.txt']
183
183
184 def test_raw_diff_is_decoded_to_unicode(self, processor):
184 def test_raw_diff_is_decoded_to_unicode(self, processor):
185 diff_data = processor.prepare()
185 diff_data = processor.prepare()
186 raw_diffs = [item['raw_diff'] for item in diff_data]
186 raw_diffs = [item['raw_diff'] for item in diff_data]
187 new_file_message = u'\nnew file mode 100644\n'
187 new_file_message = u'\nnew file mode 100644\n'
188 expected_raw_diffs = [
188 expected_raw_diffs = [
189 u' a/spΓ€cial-utf8.txt b/spΓ€cial-utf8.txt' + new_file_message,
189 u' a/spΓ€cial-utf8.txt b/spΓ€cial-utf8.txt' + new_file_message,
190 u' a/spοΏ½cial-cp1252.txt b/spοΏ½cial-cp1252.txt' + new_file_message,
190 u' a/spοΏ½cial-cp1252.txt b/spοΏ½cial-cp1252.txt' + new_file_message,
191 u' a/spοΏ½cial-latin1.txt b/spοΏ½cial-latin1.txt' + new_file_message]
191 u' a/spοΏ½cial-latin1.txt b/spοΏ½cial-latin1.txt' + new_file_message]
192 assert raw_diffs == expected_raw_diffs
192 assert raw_diffs == expected_raw_diffs
193
193
194 def test_as_raw_preserves_the_encoding(self, processor, raw_diff):
194 def test_as_raw_preserves_the_encoding(self, processor, raw_diff):
195 assert processor.as_raw() == raw_diff
195 assert processor.as_raw() == raw_diff
196
196
197
197
198 # TODO: mikhail: format the following data structure properly
198 # TODO: mikhail: format the following data structure properly
199 DIFF_FIXTURES = [
199 DIFF_FIXTURES = [
200 ('hg',
200 ('hg',
201 'hg_diff_add_single_binary_file.diff',
201 'hg_diff_add_single_binary_file.diff',
202 [('US Warszawa.jpg', 'A',
202 [('US Warszawa.jpg', 'A',
203 {'added': 0,
203 {'added': 0,
204 'deleted': 0,
204 'deleted': 0,
205 'binary': True,
205 'binary': True,
206 'ops': {NEW_FILENODE: 'new file 100755',
206 'ops': {NEW_FILENODE: 'new file 100755',
207 BIN_FILENODE: 'binary diff hidden'}}),
207 BIN_FILENODE: 'binary diff hidden'}}),
208 ]),
208 ]),
209 ('hg',
209 ('hg',
210 'hg_diff_mod_single_binary_file.diff',
210 'hg_diff_mod_single_binary_file.diff',
211 [('US Warszawa.jpg', 'M',
211 [('US Warszawa.jpg', 'M',
212 {'added': 0,
212 {'added': 0,
213 'deleted': 0,
213 'deleted': 0,
214 'binary': True,
214 'binary': True,
215 'ops': {MOD_FILENODE: 'modified file',
215 'ops': {MOD_FILENODE: 'modified file',
216 BIN_FILENODE: 'binary diff hidden'}}),
216 BIN_FILENODE: 'binary diff hidden'}}),
217 ]),
217 ]),
218 ('hg',
218 ('hg',
219 'hg_diff_mod_single_file_and_rename_and_chmod.diff',
219 'hg_diff_mod_single_file_and_rename_and_chmod.diff',
220 [('README', 'M',
220 [('README', 'M',
221 {'added': 3,
221 {'added': 3,
222 'deleted': 0,
222 'deleted': 0,
223 'binary': False,
223 'binary': False,
224 'ops': {MOD_FILENODE: 'modified file',
224 'ops': {MOD_FILENODE: 'modified file',
225 RENAMED_FILENODE: 'file renamed from README.rst to README',
225 RENAMED_FILENODE: 'file renamed from README.rst to README',
226 CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
226 CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
227 ]),
227 ]),
228 ('hg',
228 ('hg',
229 'hg_diff_no_newline.diff',
229 'hg_diff_no_newline.diff',
230 [('server.properties', 'M',
230 [('server.properties', 'M',
231 {'added': 2,
231 {'added': 2,
232 'deleted': 1,
232 'deleted': 1,
233 'binary': False,
233 'binary': False,
234 'ops': {MOD_FILENODE: 'modified file'}}),
234 'ops': {MOD_FILENODE: 'modified file'}}),
235 ]),
235 ]),
236 ('hg',
236 ('hg',
237 'hg_diff_mod_file_and_rename.diff',
237 'hg_diff_mod_file_and_rename.diff',
238 [('README.rst', 'M',
238 [('README.rst', 'M',
239 {'added': 3,
239 {'added': 3,
240 'deleted': 0,
240 'deleted': 0,
241 'binary': False,
241 'binary': False,
242 'ops': {MOD_FILENODE: 'modified file',
242 'ops': {MOD_FILENODE: 'modified file',
243 RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
243 RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
244 ]),
244 ]),
245 ('hg',
245 ('hg',
246 'hg_diff_del_single_binary_file.diff',
246 'hg_diff_del_single_binary_file.diff',
247 [('US Warszawa.jpg', 'D',
247 [('US Warszawa.jpg', 'D',
248 {'added': 0,
248 {'added': 0,
249 'deleted': 0,
249 'deleted': 0,
250 'binary': True,
250 'binary': True,
251 'ops': {DEL_FILENODE: 'deleted file',
251 'ops': {DEL_FILENODE: 'deleted file',
252 BIN_FILENODE: 'binary diff hidden'}}),
252 BIN_FILENODE: 'binary diff hidden'}}),
253 ]),
253 ]),
254 ('hg',
254 ('hg',
255 'hg_diff_chmod_and_mod_single_binary_file.diff',
255 'hg_diff_chmod_and_mod_single_binary_file.diff',
256 [('gravatar.png', 'M',
256 [('gravatar.png', 'M',
257 {'added': 0,
257 {'added': 0,
258 'deleted': 0,
258 'deleted': 0,
259 'binary': True,
259 'binary': True,
260 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
260 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
261 BIN_FILENODE: 'binary diff hidden'}}),
261 BIN_FILENODE: 'binary diff hidden'}}),
262 ]),
262 ]),
263 ('hg',
263 ('hg',
264 'hg_diff_chmod.diff',
264 'hg_diff_chmod.diff',
265 [('file', 'M',
265 [('file', 'M',
266 {'added': 0,
266 {'added': 0,
267 'deleted': 0,
267 'deleted': 0,
268 'binary': True,
268 'binary': True,
269 'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
269 'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
270 ]),
270 ]),
271 ('hg',
271 ('hg',
272 'hg_diff_rename_file.diff',
272 'hg_diff_rename_file.diff',
273 [('file_renamed', 'M',
273 [('file_renamed', 'M',
274 {'added': 0,
274 {'added': 0,
275 'deleted': 0,
275 'deleted': 0,
276 'binary': True,
276 'binary': True,
277 'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
277 'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
278 ]),
278 ]),
279 ('hg',
279 ('hg',
280 'hg_diff_rename_and_chmod_file.diff',
280 'hg_diff_rename_and_chmod_file.diff',
281 [('README', 'M',
281 [('README', 'M',
282 {'added': 0,
282 {'added': 0,
283 'deleted': 0,
283 'deleted': 0,
284 'binary': True,
284 'binary': True,
285 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
285 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
286 RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
286 RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
287 ]),
287 ]),
288 ('hg',
288 ('hg',
289 'hg_diff_binary_and_normal.diff',
289 'hg_diff_binary_and_normal.diff',
290 [('img/baseline-10px.png', 'A',
290 [('img/baseline-10px.png', 'A',
291 {'added': 0,
291 {'added': 0,
292 'deleted': 0,
292 'deleted': 0,
293 'binary': True,
293 'binary': True,
294 'ops': {NEW_FILENODE: 'new file 100644',
294 'ops': {NEW_FILENODE: 'new file 100644',
295 BIN_FILENODE: 'binary diff hidden'}}),
295 BIN_FILENODE: 'binary diff hidden'}}),
296 ('js/jquery/hashgrid.js', 'A',
296 ('js/jquery/hashgrid.js', 'A',
297 {'added': 340,
297 {'added': 340,
298 'deleted': 0,
298 'deleted': 0,
299 'binary': False,
299 'binary': False,
300 'ops': {NEW_FILENODE: 'new file 100755'}}),
300 'ops': {NEW_FILENODE: 'new file 100755'}}),
301 ('index.html', 'M',
301 ('index.html', 'M',
302 {'added': 3,
302 {'added': 3,
303 'deleted': 2,
303 'deleted': 2,
304 'binary': False,
304 'binary': False,
305 'ops': {MOD_FILENODE: 'modified file'}}),
305 'ops': {MOD_FILENODE: 'modified file'}}),
306 ('less/docs.less', 'M',
306 ('less/docs.less', 'M',
307 {'added': 34,
307 {'added': 34,
308 'deleted': 0,
308 'deleted': 0,
309 'binary': False,
309 'binary': False,
310 'ops': {MOD_FILENODE: 'modified file'}}),
310 'ops': {MOD_FILENODE: 'modified file'}}),
311 ('less/scaffolding.less', 'M',
311 ('less/scaffolding.less', 'M',
312 {'added': 1,
312 {'added': 1,
313 'deleted': 3,
313 'deleted': 3,
314 'binary': False,
314 'binary': False,
315 'ops': {MOD_FILENODE: 'modified file'}}),
315 'ops': {MOD_FILENODE: 'modified file'}}),
316 ('readme.markdown', 'M',
316 ('readme.markdown', 'M',
317 {'added': 1,
317 {'added': 1,
318 'deleted': 10,
318 'deleted': 10,
319 'binary': False,
319 'binary': False,
320 'ops': {MOD_FILENODE: 'modified file'}}),
320 'ops': {MOD_FILENODE: 'modified file'}}),
321 ('img/baseline-20px.png', 'D',
321 ('img/baseline-20px.png', 'D',
322 {'added': 0,
322 {'added': 0,
323 'deleted': 0,
323 'deleted': 0,
324 'binary': True,
324 'binary': True,
325 'ops': {DEL_FILENODE: 'deleted file',
325 'ops': {DEL_FILENODE: 'deleted file',
326 BIN_FILENODE: 'binary diff hidden'}}),
326 BIN_FILENODE: 'binary diff hidden'}}),
327 ('js/global.js', 'D',
327 ('js/global.js', 'D',
328 {'added': 0,
328 {'added': 0,
329 'deleted': 75,
329 'deleted': 75,
330 'binary': False,
330 'binary': False,
331 'ops': {DEL_FILENODE: 'deleted file'}})
331 'ops': {DEL_FILENODE: 'deleted file'}})
332 ]),
332 ]),
333 ('git',
333 ('git',
334 'git_diff_chmod.diff',
334 'git_diff_chmod.diff',
335 [('work-horus.xls', 'M',
335 [('work-horus.xls', 'M',
336 {'added': 0,
336 {'added': 0,
337 'deleted': 0,
337 'deleted': 0,
338 'binary': True,
338 'binary': True,
339 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
339 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
340 ]),
340 ]),
341 ('git',
341 ('git',
342 'git_diff_rename_file.diff',
342 'git_diff_rename_file.diff',
343 [('file.xls', 'M',
343 [('file.xls', 'M',
344 {'added': 0,
344 {'added': 0,
345 'deleted': 0,
345 'deleted': 0,
346 'binary': True,
346 'binary': True,
347 'ops': {
347 'ops': {
348 RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}})
348 RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}})
349 ]),
349 ]),
350 ('git',
350 ('git',
351 'git_diff_mod_single_binary_file.diff',
351 'git_diff_mod_single_binary_file.diff',
352 [('US Warszawa.jpg', 'M',
352 [('US Warszawa.jpg', 'M',
353 {'added': 0,
353 {'added': 0,
354 'deleted': 0,
354 'deleted': 0,
355 'binary': True,
355 'binary': True,
356 'ops': {MOD_FILENODE: 'modified file',
356 'ops': {MOD_FILENODE: 'modified file',
357 BIN_FILENODE: 'binary diff hidden'}})
357 BIN_FILENODE: 'binary diff hidden'}})
358 ]),
358 ]),
359 ('git',
359 ('git',
360 'git_diff_binary_and_normal.diff',
360 'git_diff_binary_and_normal.diff',
361 [('img/baseline-10px.png', 'A',
361 [('img/baseline-10px.png', 'A',
362 {'added': 0,
362 {'added': 0,
363 'deleted': 0,
363 'deleted': 0,
364 'binary': True,
364 'binary': True,
365 'ops': {NEW_FILENODE: 'new file 100644',
365 'ops': {NEW_FILENODE: 'new file 100644',
366 BIN_FILENODE: 'binary diff hidden'}}),
366 BIN_FILENODE: 'binary diff hidden'}}),
367 ('js/jquery/hashgrid.js', 'A',
367 ('js/jquery/hashgrid.js', 'A',
368 {'added': 340,
368 {'added': 340,
369 'deleted': 0,
369 'deleted': 0,
370 'binary': False,
370 'binary': False,
371 'ops': {NEW_FILENODE: 'new file 100755'}}),
371 'ops': {NEW_FILENODE: 'new file 100755'}}),
372 ('index.html', 'M',
372 ('index.html', 'M',
373 {'added': 3,
373 {'added': 3,
374 'deleted': 2,
374 'deleted': 2,
375 'binary': False,
375 'binary': False,
376 'ops': {MOD_FILENODE: 'modified file'}}),
376 'ops': {MOD_FILENODE: 'modified file'}}),
377 ('less/docs.less', 'M',
377 ('less/docs.less', 'M',
378 {'added': 34,
378 {'added': 34,
379 'deleted': 0,
379 'deleted': 0,
380 'binary': False,
380 'binary': False,
381 'ops': {MOD_FILENODE: 'modified file'}}),
381 'ops': {MOD_FILENODE: 'modified file'}}),
382 ('less/scaffolding.less', 'M',
382 ('less/scaffolding.less', 'M',
383 {'added': 1,
383 {'added': 1,
384 'deleted': 3,
384 'deleted': 3,
385 'binary': False,
385 'binary': False,
386 'ops': {MOD_FILENODE: 'modified file'}}),
386 'ops': {MOD_FILENODE: 'modified file'}}),
387 ('readme.markdown', 'M',
387 ('readme.markdown', 'M',
388 {'added': 1,
388 {'added': 1,
389 'deleted': 10,
389 'deleted': 10,
390 'binary': False,
390 'binary': False,
391 'ops': {MOD_FILENODE: 'modified file'}}),
391 'ops': {MOD_FILENODE: 'modified file'}}),
392 ('img/baseline-20px.png', 'D',
392 ('img/baseline-20px.png', 'D',
393 {'added': 0,
393 {'added': 0,
394 'deleted': 0,
394 'deleted': 0,
395 'binary': True,
395 'binary': True,
396 'ops': {DEL_FILENODE: 'deleted file',
396 'ops': {DEL_FILENODE: 'deleted file',
397 BIN_FILENODE: 'binary diff hidden'}}),
397 BIN_FILENODE: 'binary diff hidden'}}),
398 ('js/global.js', 'D',
398 ('js/global.js', 'D',
399 {'added': 0,
399 {'added': 0,
400 'deleted': 75,
400 'deleted': 75,
401 'binary': False,
401 'binary': False,
402 'ops': {DEL_FILENODE: 'deleted file'}}),
402 'ops': {DEL_FILENODE: 'deleted file'}}),
403 ]),
403 ]),
404 ('hg',
404 ('hg',
405 'diff_with_diff_data.diff',
405 'diff_with_diff_data.diff',
406 [('vcs/backends/base.py', 'M',
406 [('vcs/backends/base.py', 'M',
407 {'added': 18,
407 {'added': 18,
408 'deleted': 2,
408 'deleted': 2,
409 'binary': False,
409 'binary': False,
410 'ops': {MOD_FILENODE: 'modified file'}}),
410 'ops': {MOD_FILENODE: 'modified file'}}),
411 ('vcs/backends/git/repository.py', 'M',
411 ('vcs/backends/git/repository.py', 'M',
412 {'added': 46,
412 {'added': 46,
413 'deleted': 15,
413 'deleted': 15,
414 'binary': False,
414 'binary': False,
415 'ops': {MOD_FILENODE: 'modified file'}}),
415 'ops': {MOD_FILENODE: 'modified file'}}),
416 ('vcs/backends/hg.py', 'M',
416 ('vcs/backends/hg.py', 'M',
417 {'added': 22,
417 {'added': 22,
418 'deleted': 3,
418 'deleted': 3,
419 'binary': False,
419 'binary': False,
420 'ops': {MOD_FILENODE: 'modified file'}}),
420 'ops': {MOD_FILENODE: 'modified file'}}),
421 ('vcs/tests/test_git.py', 'M',
421 ('vcs/tests/test_git.py', 'M',
422 {'added': 5,
422 {'added': 5,
423 'deleted': 5,
423 'deleted': 5,
424 'binary': False,
424 'binary': False,
425 'ops': {MOD_FILENODE: 'modified file'}}),
425 'ops': {MOD_FILENODE: 'modified file'}}),
426 ('vcs/tests/test_repository.py', 'M',
426 ('vcs/tests/test_repository.py', 'M',
427 {'added': 174,
427 {'added': 174,
428 'deleted': 2,
428 'deleted': 2,
429 'binary': False,
429 'binary': False,
430 'ops': {MOD_FILENODE: 'modified file'}}),
430 'ops': {MOD_FILENODE: 'modified file'}}),
431 ]),
431 ]),
432 ('hg',
432 ('hg',
433 'hg_diff_copy_file.diff',
433 'hg_diff_copy_file.diff',
434 [('file2', 'M',
434 [('file2', 'M',
435 {'added': 0,
435 {'added': 0,
436 'deleted': 0,
436 'deleted': 0,
437 'binary': True,
437 'binary': True,
438 'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
438 'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
439 ]),
439 ]),
440 ('hg',
440 ('hg',
441 'hg_diff_copy_and_modify_file.diff',
441 'hg_diff_copy_and_modify_file.diff',
442 [('file3', 'M',
442 [('file3', 'M',
443 {'added': 1,
443 {'added': 1,
444 'deleted': 0,
444 'deleted': 0,
445 'binary': False,
445 'binary': False,
446 'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
446 'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
447 MOD_FILENODE: 'modified file'}}),
447 MOD_FILENODE: 'modified file'}}),
448 ]),
448 ]),
449 ('hg',
449 ('hg',
450 'hg_diff_copy_and_chmod_file.diff',
450 'hg_diff_copy_and_chmod_file.diff',
451 [('file4', 'M',
451 [('file4', 'M',
452 {'added': 0,
452 {'added': 0,
453 'deleted': 0,
453 'deleted': 0,
454 'binary': True,
454 'binary': True,
455 'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
455 'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
456 CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
456 CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
457 ]),
457 ]),
458 ('hg',
458 ('hg',
459 'hg_diff_copy_chmod_and_edit_file.diff',
459 'hg_diff_copy_chmod_and_edit_file.diff',
460 [('file5', 'M',
460 [('file5', 'M',
461 {'added': 2,
461 {'added': 2,
462 'deleted': 1,
462 'deleted': 1,
463 'binary': False,
463 'binary': False,
464 'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
464 'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
465 CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
465 CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
466 MOD_FILENODE: 'modified file'}})]),
466 MOD_FILENODE: 'modified file'}})]),
467
467
468 # Diffs to validate rename and copy file with space in its name
468 # Diffs to validate rename and copy file with space in its name
469 ('git',
469 ('git',
470 'git_diff_rename_file_with_spaces.diff',
470 'git_diff_rename_file_with_spaces.diff',
471 [('file_with_ two spaces.txt', 'M',
471 [('file_with_ two spaces.txt', 'M',
472 {'added': 0,
472 {'added': 0,
473 'deleted': 0,
473 'deleted': 0,
474 'binary': True,
474 'binary': True,
475 'ops': {
475 'ops': {
476 RENAMED_FILENODE: (
476 RENAMED_FILENODE: (
477 'file renamed from file_with_ spaces.txt to file_with_ '
477 'file renamed from file_with_ spaces.txt to file_with_ '
478 ' two spaces.txt')}
478 ' two spaces.txt')}
479 }), ]),
479 }), ]),
480 ('hg',
480 ('hg',
481 'hg_diff_rename_file_with_spaces.diff',
481 'hg_diff_rename_file_with_spaces.diff',
482 [('file_changed _.txt', 'M',
482 [('file_changed _.txt', 'M',
483 {'added': 0,
483 {'added': 0,
484 'deleted': 0,
484 'deleted': 0,
485 'binary': True,
485 'binary': True,
486 'ops': {
486 'ops': {
487 RENAMED_FILENODE: (
487 RENAMED_FILENODE: (
488 'file renamed from file_ with update.txt to file_changed'
488 'file renamed from file_ with update.txt to file_changed'
489 ' _.txt')}
489 ' _.txt')}
490 }), ]),
490 }), ]),
491 ('hg',
491 ('hg',
492 'hg_diff_copy_file_with_spaces.diff',
492 'hg_diff_copy_file_with_spaces.diff',
493 [('file_copied_ with spaces.txt', 'M',
493 [('file_copied_ with spaces.txt', 'M',
494 {'added': 0,
494 {'added': 0,
495 'deleted': 0,
495 'deleted': 0,
496 'binary': True,
496 'binary': True,
497 'ops': {
497 'ops': {
498 COPIED_FILENODE: (
498 COPIED_FILENODE: (
499 'file copied from file_changed_without_spaces.txt to'
499 'file copied from file_changed_without_spaces.txt to'
500 ' file_copied_ with spaces.txt')}
500 ' file_copied_ with spaces.txt')}
501 }),
501 }),
502 ]),
502 ]),
503
503
504 # special signs from git
504 # special signs from git
505 ('git',
505 ('git',
506 'git_diff_binary_special_files.diff',
506 'git_diff_binary_special_files.diff',
507 [('css/_Icon\\r', 'A',
507 [('css/_Icon\\r', 'A',
508 {'added': 0,
508 {'added': 0,
509 'deleted': 0,
509 'deleted': 0,
510 'binary': True,
510 'binary': True,
511 'ops': {NEW_FILENODE: 'new file 100644',
511 'ops': {NEW_FILENODE: 'new file 100644',
512 BIN_FILENODE: 'binary diff hidden'}
512 BIN_FILENODE: 'binary diff hidden'}
513 }),
513 }),
514 ]),
514 ]),
515 ('git',
515 ('git',
516 'git_diff_binary_special_files_2.diff',
516 'git_diff_binary_special_files_2.diff',
517 [('css/Icon\\r', 'A',
517 [('css/Icon\\r', 'A',
518 {'added': 0,
518 {'added': 0,
519 'deleted': 0,
519 'deleted': 0,
520 'binary': True,
520 'binary': True,
521 'ops': {NEW_FILENODE: 'new file 100644', }
521 'ops': {NEW_FILENODE: 'new file 100644', }
522 }),
522 }),
523 ]),
523 ]),
524
524
525 ('svn',
525 ('svn',
526 'svn_diff_binary_add_file.diff',
526 'svn_diff_binary_add_file.diff',
527 [('intl.dll', 'A',
527 [('intl.dll', 'A',
528 {'added': 0,
528 {'added': 0,
529 'deleted': 0,
529 'deleted': 0,
530 'binary': False,
530 'binary': False,
531 'ops': {NEW_FILENODE: 'new file 10644',
531 'ops': {NEW_FILENODE: 'new file 10644',
532 #TODO(Marcink): depends on binary detection on svn patches
532 #TODO(Marcink): depends on binary detection on svn patches
533 # BIN_FILENODE: 'binary diff hidden'
533 # BIN_FILENODE: 'binary diff hidden'
534 }
534 }
535 }),
535 }),
536 ]),
536 ]),
537
537
538 ('svn',
538 ('svn',
539 'svn_diff_multiple_changes.diff',
539 'svn_diff_multiple_changes.diff',
540 [('trunk/doc/images/SettingsOverlay.png', 'M',
540 [('trunk/doc/images/SettingsOverlay.png', 'M',
541 {'added': 0,
541 {'added': 0,
542 'deleted': 0,
542 'deleted': 0,
543 'binary': False,
543 'binary': False,
544 'ops': {MOD_FILENODE: 'modified file',
544 'ops': {MOD_FILENODE: 'modified file',
545 #TODO(Marcink): depends on binary detection on svn patches
545 #TODO(Marcink): depends on binary detection on svn patches
546 # BIN_FILENODE: 'binary diff hidden'
546 # BIN_FILENODE: 'binary diff hidden'
547 }
547 }
548 }),
548 }),
549 ('trunk/doc/source/de/tsvn_ch04.xml', 'M',
549 ('trunk/doc/source/de/tsvn_ch04.xml', 'M',
550 {'added': 89,
550 {'added': 89,
551 'deleted': 34,
551 'deleted': 34,
552 'binary': False,
552 'binary': False,
553 'ops': {MOD_FILENODE: 'modified file'}
553 'ops': {MOD_FILENODE: 'modified file'}
554 }),
554 }),
555 ('trunk/doc/source/en/tsvn_ch04.xml', 'M',
555 ('trunk/doc/source/en/tsvn_ch04.xml', 'M',
556 {'added': 66,
556 {'added': 66,
557 'deleted': 21,
557 'deleted': 21,
558 'binary': False,
558 'binary': False,
559 'ops': {MOD_FILENODE: 'modified file'}
559 'ops': {MOD_FILENODE: 'modified file'}
560 }),
560 }),
561 ('trunk/src/Changelog.txt', 'M',
561 ('trunk/src/Changelog.txt', 'M',
562 {'added': 2,
562 {'added': 2,
563 'deleted': 0,
563 'deleted': 0,
564 'binary': False,
564 'binary': False,
565 'ops': {MOD_FILENODE: 'modified file'}
565 'ops': {MOD_FILENODE: 'modified file'}
566 }),
566 }),
567 ('trunk/src/Resources/TortoiseProcENG.rc', 'M',
567 ('trunk/src/Resources/TortoiseProcENG.rc', 'M',
568 {'added': 19,
568 {'added': 19,
569 'deleted': 13,
569 'deleted': 13,
570 'binary': False,
570 'binary': False,
571 'ops': {MOD_FILENODE: 'modified file'}
571 'ops': {MOD_FILENODE: 'modified file'}
572 }),
572 }),
573 ('trunk/src/TortoiseProc/SetOverlayPage.cpp', 'M',
573 ('trunk/src/TortoiseProc/SetOverlayPage.cpp', 'M',
574 {'added': 16,
574 {'added': 16,
575 'deleted': 1,
575 'deleted': 1,
576 'binary': False,
576 'binary': False,
577 'ops': {MOD_FILENODE: 'modified file'}
577 'ops': {MOD_FILENODE: 'modified file'}
578 }),
578 }),
579 ('trunk/src/TortoiseProc/SetOverlayPage.h', 'M',
579 ('trunk/src/TortoiseProc/SetOverlayPage.h', 'M',
580 {'added': 3,
580 {'added': 3,
581 'deleted': 0,
581 'deleted': 0,
582 'binary': False,
582 'binary': False,
583 'ops': {MOD_FILENODE: 'modified file'}
583 'ops': {MOD_FILENODE: 'modified file'}
584 }),
584 }),
585 ('trunk/src/TortoiseProc/resource.h', 'M',
585 ('trunk/src/TortoiseProc/resource.h', 'M',
586 {'added': 2,
586 {'added': 2,
587 'deleted': 0,
587 'deleted': 0,
588 'binary': False,
588 'binary': False,
589 'ops': {MOD_FILENODE: 'modified file'}
589 'ops': {MOD_FILENODE: 'modified file'}
590 }),
590 }),
591 ('trunk/src/TortoiseShell/ShellCache.h', 'M',
591 ('trunk/src/TortoiseShell/ShellCache.h', 'M',
592 {'added': 50,
592 {'added': 50,
593 'deleted': 1,
593 'deleted': 1,
594 'binary': False,
594 'binary': False,
595 'ops': {MOD_FILENODE: 'modified file'}
595 'ops': {MOD_FILENODE: 'modified file'}
596 }),
596 }),
597 ]),
597 ]),
598
598
599
599
600 # TODO: mikhail: do we still need this?
600 # TODO: mikhail: do we still need this?
601 # (
601 # (
602 # 'hg',
602 # 'hg',
603 # 'large_diff.diff',
603 # 'large_diff.diff',
604 # [
604 # [
605 # ('.hgignore', 'A', {
605 # ('.hgignore', 'A', {
606 # 'deleted': 0, 'binary': False, 'added': 3, 'ops': {
606 # 'deleted': 0, 'binary': False, 'added': 3, 'ops': {
607 # 1: 'new file 100644'}}),
607 # 1: 'new file 100644'}}),
608 # (
608 # (
609 # 'MANIFEST.in', 'A',
609 # 'MANIFEST.in', 'A',
610 # {'deleted': 0, 'binary': False, 'added': 3, 'ops': {
610 # {'deleted': 0, 'binary': False, 'added': 3, 'ops': {
611 # 1: 'new file 100644'}}),
611 # 1: 'new file 100644'}}),
612 # (
612 # (
613 # 'README.txt', 'A',
613 # 'README.txt', 'A',
614 # {'deleted': 0, 'binary': False, 'added': 19, 'ops': {
614 # {'deleted': 0, 'binary': False, 'added': 19, 'ops': {
615 # 1: 'new file 100644'}}),
615 # 1: 'new file 100644'}}),
616 # (
616 # (
617 # 'development.ini', 'A', {
617 # 'development.ini', 'A', {
618 # 'deleted': 0, 'binary': False, 'added': 116, 'ops': {
618 # 'deleted': 0, 'binary': False, 'added': 116, 'ops': {
619 # 1: 'new file 100644'}}),
619 # 1: 'new file 100644'}}),
620 # (
620 # (
621 # 'docs/index.txt', 'A', {
621 # 'docs/index.txt', 'A', {
622 # 'deleted': 0, 'binary': False, 'added': 19, 'ops': {
622 # 'deleted': 0, 'binary': False, 'added': 19, 'ops': {
623 # 1: 'new file 100644'}}),
623 # 1: 'new file 100644'}}),
624 # (
624 # (
625 # 'ez_setup.py', 'A', {
625 # 'ez_setup.py', 'A', {
626 # 'deleted': 0, 'binary': False, 'added': 276, 'ops': {
626 # 'deleted': 0, 'binary': False, 'added': 276, 'ops': {
627 # 1: 'new file 100644'}}),
627 # 1: 'new file 100644'}}),
628 # (
628 # (
629 # 'hgapp.py', 'A', {
629 # 'hgapp.py', 'A', {
630 # 'deleted': 0, 'binary': False, 'added': 26, 'ops': {
630 # 'deleted': 0, 'binary': False, 'added': 26, 'ops': {
631 # 1: 'new file 100644'}}),
631 # 1: 'new file 100644'}}),
632 # (
632 # (
633 # 'hgwebdir.config', 'A', {
633 # 'hgwebdir.config', 'A', {
634 # 'deleted': 0, 'binary': False, 'added': 21, 'ops': {
634 # 'deleted': 0, 'binary': False, 'added': 21, 'ops': {
635 # 1: 'new file 100644'}}),
635 # 1: 'new file 100644'}}),
636 # (
636 # (
637 # 'pylons_app.egg-info/PKG-INFO', 'A', {
637 # 'pylons_app.egg-info/PKG-INFO', 'A', {
638 # 'deleted': 0, 'binary': False, 'added': 10, 'ops': {
638 # 'deleted': 0, 'binary': False, 'added': 10, 'ops': {
639 # 1: 'new file 100644'}}),
639 # 1: 'new file 100644'}}),
640 # (
640 # (
641 # 'pylons_app.egg-info/SOURCES.txt', 'A', {
641 # 'pylons_app.egg-info/SOURCES.txt', 'A', {
642 # 'deleted': 0, 'binary': False, 'added': 33, 'ops': {
642 # 'deleted': 0, 'binary': False, 'added': 33, 'ops': {
643 # 1: 'new file 100644'}}),
643 # 1: 'new file 100644'}}),
644 # (
644 # (
645 # 'pylons_app.egg-info/dependency_links.txt', 'A', {
645 # 'pylons_app.egg-info/dependency_links.txt', 'A', {
646 # 'deleted': 0, 'binary': False, 'added': 1, 'ops': {
646 # 'deleted': 0, 'binary': False, 'added': 1, 'ops': {
647 # 1: 'new file 100644'}}),
647 # 1: 'new file 100644'}}),
648 # ]
648 # ]
649 # ),
649 # ),
650 ]
650 ]
651
651
652 DIFF_FIXTURES_WITH_CONTENT = [
652 DIFF_FIXTURES_WITH_CONTENT = [
653 (
653 (
654 'hg', 'hg_diff_single_file_change_newline.diff',
654 'hg', 'hg_diff_single_file_change_newline.diff',
655 [
655 [
656 (
656 (
657 'file_b', # filename
657 'file_b', # filename
658 'A', # change
658 'A', # change
659 { # stats
659 { # stats
660 'added': 1,
660 'added': 1,
661 'deleted': 0,
661 'deleted': 0,
662 'binary': False,
662 'binary': False,
663 'ops': {NEW_FILENODE: 'new file 100644', }
663 'ops': {NEW_FILENODE: 'new file 100644', }
664 },
664 },
665 '@@ -0,0 +1 @@\n+test_content b\n' # diff
665 '@@ -0,0 +1 @@\n+test_content b\n' # diff
666 ),
666 ),
667 ],
667 ],
668 ),
668 ),
669 (
669 (
670 'hg', 'hg_diff_double_file_change_newline.diff',
670 'hg', 'hg_diff_double_file_change_newline.diff',
671 [
671 [
672 (
672 (
673 'file_b', # filename
673 'file_b', # filename
674 'A', # change
674 'A', # change
675 { # stats
675 { # stats
676 'added': 1,
676 'added': 1,
677 'deleted': 0,
677 'deleted': 0,
678 'binary': False,
678 'binary': False,
679 'ops': {NEW_FILENODE: 'new file 100644', }
679 'ops': {NEW_FILENODE: 'new file 100644', }
680 },
680 },
681 '@@ -0,0 +1 @@\n+test_content b\n' # diff
681 '@@ -0,0 +1 @@\n+test_content b\n' # diff
682 ),
682 ),
683 (
683 (
684 'file_c', # filename
684 'file_c', # filename
685 'A', # change
685 'A', # change
686 { # stats
686 { # stats
687 'added': 1,
687 'added': 1,
688 'deleted': 0,
688 'deleted': 0,
689 'binary': False,
689 'binary': False,
690 'ops': {NEW_FILENODE: 'new file 100644', }
690 'ops': {NEW_FILENODE: 'new file 100644', }
691 },
691 },
692 '@@ -0,0 +1 @@\n+test_content c\n' # diff
692 '@@ -0,0 +1 @@\n+test_content c\n' # diff
693 ),
693 ),
694 ],
694 ],
695 ),
695 ),
696 (
696 (
697 'hg', 'hg_diff_double_file_change_double_newline.diff',
697 'hg', 'hg_diff_double_file_change_double_newline.diff',
698 [
698 [
699 (
699 (
700 'file_b', # filename
700 'file_b', # filename
701 'A', # change
701 'A', # change
702 { # stats
702 { # stats
703 'added': 1,
703 'added': 1,
704 'deleted': 0,
704 'deleted': 0,
705 'binary': False,
705 'binary': False,
706 'ops': {NEW_FILENODE: 'new file 100644', }
706 'ops': {NEW_FILENODE: 'new file 100644', }
707 },
707 },
708 '@@ -0,0 +1 @@\n+test_content b\n\n' # diff
708 '@@ -0,0 +1 @@\n+test_content b\n\n' # diff
709 ),
709 ),
710 (
710 (
711 'file_c', # filename
711 'file_c', # filename
712 'A', # change
712 'A', # change
713 { # stats
713 { # stats
714 'added': 1,
714 'added': 1,
715 'deleted': 0,
715 'deleted': 0,
716 'binary': False,
716 'binary': False,
717 'ops': {NEW_FILENODE: 'new file 100644', }
717 'ops': {NEW_FILENODE: 'new file 100644', }
718 },
718 },
719 '@@ -0,0 +1 @@\n+test_content c\n' # diff
719 '@@ -0,0 +1 @@\n+test_content c\n' # diff
720 ),
720 ),
721 ],
721 ],
722 ),
722 ),
723 (
723 (
724 'hg', 'hg_diff_four_file_change_newline.diff',
724 'hg', 'hg_diff_four_file_change_newline.diff',
725 [
725 [
726 (
726 (
727 'file', # filename
727 'file', # filename
728 'A', # change
728 'A', # change
729 { # stats
729 { # stats
730 'added': 1,
730 'added': 1,
731 'deleted': 0,
731 'deleted': 0,
732 'binary': False,
732 'binary': False,
733 'ops': {NEW_FILENODE: 'new file 100644', }
733 'ops': {NEW_FILENODE: 'new file 100644', }
734 },
734 },
735 '@@ -0,0 +1,1 @@\n+file\n' # diff
735 '@@ -0,0 +1,1 @@\n+file\n' # diff
736 ),
736 ),
737 (
737 (
738 'file2', # filename
738 'file2', # filename
739 'A', # change
739 'A', # change
740 { # stats
740 { # stats
741 'added': 1,
741 'added': 1,
742 'deleted': 0,
742 'deleted': 0,
743 'binary': False,
743 'binary': False,
744 'ops': {NEW_FILENODE: 'new file 100644', }
744 'ops': {NEW_FILENODE: 'new file 100644', }
745 },
745 },
746 '@@ -0,0 +1,1 @@\n+another line\n' # diff
746 '@@ -0,0 +1,1 @@\n+another line\n' # diff
747 ),
747 ),
748 (
748 (
749 'file3', # filename
749 'file3', # filename
750 'A', # change
750 'A', # change
751 { # stats
751 { # stats
752 'added': 1,
752 'added': 1,
753 'deleted': 0,
753 'deleted': 0,
754 'binary': False,
754 'binary': False,
755 'ops': {NEW_FILENODE: 'new file 100644', }
755 'ops': {NEW_FILENODE: 'new file 100644', }
756 },
756 },
757 '@@ -0,0 +1,1 @@\n+newline\n' # diff
757 '@@ -0,0 +1,1 @@\n+newline\n' # diff
758 ),
758 ),
759 (
759 (
760 'file4', # filename
760 'file4', # filename
761 'A', # change
761 'A', # change
762 { # stats
762 { # stats
763 'added': 1,
763 'added': 1,
764 'deleted': 0,
764 'deleted': 0,
765 'binary': False,
765 'binary': False,
766 'ops': {NEW_FILENODE: 'new file 100644', }
766 'ops': {NEW_FILENODE: 'new file 100644', }
767 },
767 },
768 '@@ -0,0 +1,1 @@\n+fil4\n\\ No newline at end of file' # diff
768 '@@ -0,0 +1,1 @@\n+fil4\n\\ No newline at end of file' # diff
769 ),
769 ),
770 ],
770 ],
771 ),
771 ),
772
772
773 ]
773 ]
774
774
775
775
776 diff_class = {
776 diff_class = {
777 'git': GitDiff,
777 'git': GitDiff,
778 'hg': MercurialDiff,
778 'hg': MercurialDiff,
779 'svn': SubversionDiff,
779 'svn': SubversionDiff,
780 }
780 }
781
781
782
782
783 @pytest.fixture(params=DIFF_FIXTURES)
783 @pytest.fixture(params=DIFF_FIXTURES)
784 def diff_fixture(request):
784 def diff_fixture(request):
785 vcs, diff_fixture, expected = request.param
785 vcs, diff_fixture, expected = request.param
786 diff_txt = fixture.load_resource(diff_fixture)
786 diff_txt = fixture.load_resource(diff_fixture)
787 diff = diff_class[vcs](diff_txt)
787 diff = diff_class[vcs](diff_txt)
788 return diff, expected
788 return diff, expected
789
789
790
790
791 def test_diff_lib(diff_fixture):
791 def test_diff_lib(diff_fixture):
792 diff, expected_data = diff_fixture
792 diff, expected_data = diff_fixture
793 diff_proc = DiffProcessor(diff)
793 diff_proc = DiffProcessor(diff)
794 diff_proc_d = diff_proc.prepare()
794 diff_proc_d = diff_proc.prepare()
795 data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
795 data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
796 assert expected_data == data
796 assert expected_data == data
797
797
798
798
799 @pytest.fixture(params=DIFF_FIXTURES_WITH_CONTENT)
799 @pytest.fixture(params=DIFF_FIXTURES_WITH_CONTENT)
800 def diff_fixture_w_content(request):
800 def diff_fixture_w_content(request):
801 vcs, diff_fixture, expected = request.param
801 vcs, diff_fixture, expected = request.param
802 diff_txt = fixture.load_resource(diff_fixture)
802 diff_txt = fixture.load_resource(diff_fixture)
803 diff = diff_class[vcs](diff_txt)
803 diff = diff_class[vcs](diff_txt)
804 return diff, expected
804 return diff, expected
805
805
806
806
807 def test_diff_lib_newlines(diff_fixture_w_content):
807 def test_diff_lib_newlines(diff_fixture_w_content):
808 diff, expected_data = diff_fixture_w_content
808 diff, expected_data = diff_fixture_w_content
809 diff_proc = DiffProcessor(diff)
809 diff_proc = DiffProcessor(diff)
810 diff_proc_d = diff_proc.prepare()
810 diff_proc_d = diff_proc.prepare()
811 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
811 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
812 for x in diff_proc_d]
812 for x in diff_proc_d]
813 assert expected_data == data
813 assert expected_data == data
814
815
816 @pytest.mark.parametrize('input_str', [
817 '',
818 '\n',
819 '\n\n',
820 'First\n+second',
821 'First\n+second\n',
822
823 '\n\n\n Multi \n\n\n',
824 '\n\n\n Multi beginning',
825 'Multi end \n\n\n',
826 'Multi end',
827 '@@ -0,0 +1 @@\n+test_content \n\n b\n'
828 ], ids=no_newline_id_generator)
829 def test_splitlines(input_str):
830 result = DiffProcessor.diff_splitter(input_str)
831 assert list(result) == input_str.splitlines(True)
General Comments 0
You need to be logged in to leave comments. Login now