##// END OF EJS Templates
diffs: fixed problem with rendering no newline at the end of file markers....
marcink -
r2380:8531a2ca default
parent child Browse files
Show More
@@ -0,0 +1,10 b''
1 diff --git a/server.properties b/server.properties
2 --- a/server.properties
3 +++ b/server.properties
4 @@ -1,2 +1,3 @@
5 property=value
6 -anotherProperty=value
7 \ No newline at end of file
8 +anotherProperty=value
9 +newProperty=super_important_value
10 \ No newline at end of file No newline at end of file
@@ -1,711 +1,735 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import logging
22 22 import difflib
23 23 from itertools import groupby
24 24
25 25 from pygments import lex
26 26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 27 from rhodecode.lib.helpers import (
28 28 get_lexer_for_filenode, html_escape, get_custom_lexer)
29 29 from rhodecode.lib.utils2 import AttributeDict
30 30 from rhodecode.lib.vcs.nodes import FileNode
31 31 from rhodecode.lib.diff_match_patch import diff_match_patch
32 32 from rhodecode.lib.diffs import LimitedDiffContainer
33 33 from pygments.lexers import get_lexer_by_name
34 34
35 35 plain_text_lexer = get_lexer_by_name(
36 36 'text', stripall=False, stripnl=False, ensurenl=False)
37 37
38 38
39 39 log = logging.getLogger()
40 40
41 41
42 42 def filenode_as_lines_tokens(filenode, lexer=None):
43 43 org_lexer = lexer
44 44 lexer = lexer or get_lexer_for_filenode(filenode)
45 45 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
46 46 lexer, filenode, org_lexer)
47 47 tokens = tokenize_string(filenode.content, lexer)
48 48 lines = split_token_stream(tokens, split_string='\n')
49 49 rv = list(lines)
50 50 return rv
51 51
52 52
53 53 def tokenize_string(content, lexer):
54 54 """
55 55 Use pygments to tokenize some content based on a lexer
56 56 ensuring all original new lines and whitespace is preserved
57 57 """
58 58
59 59 lexer.stripall = False
60 60 lexer.stripnl = False
61 61 lexer.ensurenl = False
62 62 for token_type, token_text in lex(content, lexer):
63 63 yield pygment_token_class(token_type), token_text
64 64
65 65
66 66 def split_token_stream(tokens, split_string=u'\n'):
67 67 """
68 68 Take a list of (TokenType, text) tuples and split them by a string
69 69
70 70 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
71 71 [(TEXT, 'some'), (TEXT, 'text'),
72 72 (TEXT, 'more'), (TEXT, 'text')]
73 73 """
74 74
75 75 buffer = []
76 76 for token_class, token_text in tokens:
77 77 parts = token_text.split(split_string)
78 78 for part in parts[:-1]:
79 79 buffer.append((token_class, part))
80 80 yield buffer
81 81 buffer = []
82 82
83 83 buffer.append((token_class, parts[-1]))
84 84
85 85 if buffer:
86 86 yield buffer
87 87
88 88
89 89 def filenode_as_annotated_lines_tokens(filenode):
90 90 """
91 91 Take a file node and return a list of annotations => lines, if no annotation
92 92 is found, it will be None.
93 93
94 94 eg:
95 95
96 96 [
97 97 (annotation1, [
98 98 (1, line1_tokens_list),
99 99 (2, line2_tokens_list),
100 100 ]),
101 101 (annotation2, [
102 102 (3, line1_tokens_list),
103 103 ]),
104 104 (None, [
105 105 (4, line1_tokens_list),
106 106 ]),
107 107 (annotation1, [
108 108 (5, line1_tokens_list),
109 109 (6, line2_tokens_list),
110 110 ])
111 111 ]
112 112 """
113 113
114 114 commit_cache = {} # cache commit_getter lookups
115 115
116 116 def _get_annotation(commit_id, commit_getter):
117 117 if commit_id not in commit_cache:
118 118 commit_cache[commit_id] = commit_getter()
119 119 return commit_cache[commit_id]
120 120
121 121 annotation_lookup = {
122 122 line_no: _get_annotation(commit_id, commit_getter)
123 123 for line_no, commit_id, commit_getter, line_content
124 124 in filenode.annotate
125 125 }
126 126
127 127 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
128 128 for line_no, tokens
129 129 in enumerate(filenode_as_lines_tokens(filenode), 1))
130 130
131 131 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
132 132
133 133 for annotation, group in grouped_annotations_lines:
134 134 yield (
135 135 annotation, [(line_no, tokens)
136 136 for (_, line_no, tokens) in group]
137 137 )
138 138
139 139
140 140 def render_tokenstream(tokenstream):
141 141 result = []
142 142 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
143 143
144 144 if token_class:
145 145 result.append(u'<span class="%s">' % token_class)
146 146 else:
147 147 result.append(u'<span>')
148 148
149 149 for op_tag, token_text in token_ops_texts:
150 150
151 151 if op_tag:
152 152 result.append(u'<%s>' % op_tag)
153 153
154 154 escaped_text = html_escape(token_text)
155 155
156 156 # TODO: dan: investigate showing hidden characters like space/nl/tab
157 157 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
158 158 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
159 159 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
160 160
161 161 result.append(escaped_text)
162 162
163 163 if op_tag:
164 164 result.append(u'</%s>' % op_tag)
165 165
166 166 result.append(u'</span>')
167 167
168 168 html = ''.join(result)
169 169 return html
170 170
171 171
172 172 def rollup_tokenstream(tokenstream):
173 173 """
174 174 Group a token stream of the format:
175 175
176 176 ('class', 'op', 'text')
177 177 or
178 178 ('class', 'text')
179 179
180 180 into
181 181
182 182 [('class1',
183 183 [('op1', 'text'),
184 184 ('op2', 'text')]),
185 185 ('class2',
186 186 [('op3', 'text')])]
187 187
188 188 This is used to get the minimal tags necessary when
189 189 rendering to html eg for a token stream ie.
190 190
191 191 <span class="A"><ins>he</ins>llo</span>
192 192 vs
193 193 <span class="A"><ins>he</ins></span><span class="A">llo</span>
194 194
195 195 If a 2 tuple is passed in, the output op will be an empty string.
196 196
197 197 eg:
198 198
199 199 >>> rollup_tokenstream([('classA', '', 'h'),
200 200 ('classA', 'del', 'ell'),
201 201 ('classA', '', 'o'),
202 202 ('classB', '', ' '),
203 203 ('classA', '', 'the'),
204 204 ('classA', '', 're'),
205 205 ])
206 206
207 207 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
208 208 ('classB', [('', ' ')],
209 209 ('classA', [('', 'there')]]
210 210
211 211 """
212 212 if tokenstream and len(tokenstream[0]) == 2:
213 213 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
214 214
215 215 result = []
216 216 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
217 217 ops = []
218 218 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
219 219 text_buffer = []
220 220 for t_class, t_op, t_text in token_text_list:
221 221 text_buffer.append(t_text)
222 222 ops.append((token_op, ''.join(text_buffer)))
223 223 result.append((token_class, ops))
224 224 return result
225 225
226 226
227 227 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
228 228 """
229 229 Converts a list of (token_class, token_text) tuples to a list of
230 230 (token_class, token_op, token_text) tuples where token_op is one of
231 231 ('ins', 'del', '')
232 232
233 233 :param old_tokens: list of (token_class, token_text) tuples of old line
234 234 :param new_tokens: list of (token_class, token_text) tuples of new line
235 235 :param use_diff_match_patch: boolean, will use google's diff match patch
236 236 library which has options to 'smooth' out the character by character
237 237 differences making nicer ins/del blocks
238 238 """
239 239
240 240 old_tokens_result = []
241 241 new_tokens_result = []
242 242
243 243 similarity = difflib.SequenceMatcher(None,
244 244 ''.join(token_text for token_class, token_text in old_tokens),
245 245 ''.join(token_text for token_class, token_text in new_tokens)
246 246 ).ratio()
247 247
248 248 if similarity < 0.6: # return, the blocks are too different
249 249 for token_class, token_text in old_tokens:
250 250 old_tokens_result.append((token_class, '', token_text))
251 251 for token_class, token_text in new_tokens:
252 252 new_tokens_result.append((token_class, '', token_text))
253 253 return old_tokens_result, new_tokens_result, similarity
254 254
255 255 token_sequence_matcher = difflib.SequenceMatcher(None,
256 256 [x[1] for x in old_tokens],
257 257 [x[1] for x in new_tokens])
258 258
259 259 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
260 260 # check the differences by token block types first to give a more
261 261 # nicer "block" level replacement vs character diffs
262 262
263 263 if tag == 'equal':
264 264 for token_class, token_text in old_tokens[o1:o2]:
265 265 old_tokens_result.append((token_class, '', token_text))
266 266 for token_class, token_text in new_tokens[n1:n2]:
267 267 new_tokens_result.append((token_class, '', token_text))
268 268 elif tag == 'delete':
269 269 for token_class, token_text in old_tokens[o1:o2]:
270 270 old_tokens_result.append((token_class, 'del', token_text))
271 271 elif tag == 'insert':
272 272 for token_class, token_text in new_tokens[n1:n2]:
273 273 new_tokens_result.append((token_class, 'ins', token_text))
274 274 elif tag == 'replace':
275 275 # if same type token blocks must be replaced, do a diff on the
276 276 # characters in the token blocks to show individual changes
277 277
278 278 old_char_tokens = []
279 279 new_char_tokens = []
280 280 for token_class, token_text in old_tokens[o1:o2]:
281 281 for char in token_text:
282 282 old_char_tokens.append((token_class, char))
283 283
284 284 for token_class, token_text in new_tokens[n1:n2]:
285 285 for char in token_text:
286 286 new_char_tokens.append((token_class, char))
287 287
288 288 old_string = ''.join([token_text for
289 289 token_class, token_text in old_char_tokens])
290 290 new_string = ''.join([token_text for
291 291 token_class, token_text in new_char_tokens])
292 292
293 293 char_sequence = difflib.SequenceMatcher(
294 294 None, old_string, new_string)
295 295 copcodes = char_sequence.get_opcodes()
296 296 obuffer, nbuffer = [], []
297 297
298 298 if use_diff_match_patch:
299 299 dmp = diff_match_patch()
300 300 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
301 301 reps = dmp.diff_main(old_string, new_string)
302 302 dmp.diff_cleanupEfficiency(reps)
303 303
304 304 a, b = 0, 0
305 305 for op, rep in reps:
306 306 l = len(rep)
307 307 if op == 0:
308 308 for i, c in enumerate(rep):
309 309 obuffer.append((old_char_tokens[a+i][0], '', c))
310 310 nbuffer.append((new_char_tokens[b+i][0], '', c))
311 311 a += l
312 312 b += l
313 313 elif op == -1:
314 314 for i, c in enumerate(rep):
315 315 obuffer.append((old_char_tokens[a+i][0], 'del', c))
316 316 a += l
317 317 elif op == 1:
318 318 for i, c in enumerate(rep):
319 319 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
320 320 b += l
321 321 else:
322 322 for ctag, co1, co2, cn1, cn2 in copcodes:
323 323 if ctag == 'equal':
324 324 for token_class, token_text in old_char_tokens[co1:co2]:
325 325 obuffer.append((token_class, '', token_text))
326 326 for token_class, token_text in new_char_tokens[cn1:cn2]:
327 327 nbuffer.append((token_class, '', token_text))
328 328 elif ctag == 'delete':
329 329 for token_class, token_text in old_char_tokens[co1:co2]:
330 330 obuffer.append((token_class, 'del', token_text))
331 331 elif ctag == 'insert':
332 332 for token_class, token_text in new_char_tokens[cn1:cn2]:
333 333 nbuffer.append((token_class, 'ins', token_text))
334 334 elif ctag == 'replace':
335 335 for token_class, token_text in old_char_tokens[co1:co2]:
336 336 obuffer.append((token_class, 'del', token_text))
337 337 for token_class, token_text in new_char_tokens[cn1:cn2]:
338 338 nbuffer.append((token_class, 'ins', token_text))
339 339
340 340 old_tokens_result.extend(obuffer)
341 341 new_tokens_result.extend(nbuffer)
342 342
343 343 return old_tokens_result, new_tokens_result, similarity
344 344
345 345
346 346 class DiffSet(object):
347 347 """
348 348 An object for parsing the diff result from diffs.DiffProcessor and
349 349 adding highlighting, side by side/unified renderings and line diffs
350 350 """
351 351
352 352 HL_REAL = 'REAL' # highlights using original file, slow
353 353 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
354 354 # in the case of multiline code
355 355 HL_NONE = 'NONE' # no highlighting, fastest
356 356
357 357 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
358 358 source_repo_name=None,
359 359 source_node_getter=lambda filename: None,
360 360 target_node_getter=lambda filename: None,
361 361 source_nodes=None, target_nodes=None,
362 362 max_file_size_limit=150 * 1024, # files over this size will
363 363 # use fast highlighting
364 364 comments=None,
365 365 ):
366 366
367 367 self.highlight_mode = highlight_mode
368 368 self.highlighted_filenodes = {}
369 369 self.source_node_getter = source_node_getter
370 370 self.target_node_getter = target_node_getter
371 371 self.source_nodes = source_nodes or {}
372 372 self.target_nodes = target_nodes or {}
373 373 self.repo_name = repo_name
374 374 self.source_repo_name = source_repo_name or repo_name
375 375 self.comments = comments or {}
376 376 self.comments_store = self.comments.copy()
377 377 self.max_file_size_limit = max_file_size_limit
378 378
379 379 def render_patchset(self, patchset, source_ref=None, target_ref=None):
380 380 diffset = AttributeDict(dict(
381 381 lines_added=0,
382 382 lines_deleted=0,
383 383 changed_files=0,
384 384 files=[],
385 385 file_stats={},
386 386 limited_diff=isinstance(patchset, LimitedDiffContainer),
387 387 repo_name=self.repo_name,
388 388 source_repo_name=self.source_repo_name,
389 389 source_ref=source_ref,
390 390 target_ref=target_ref,
391 391 ))
392 392 for patch in patchset:
393 393 diffset.file_stats[patch['filename']] = patch['stats']
394 394 filediff = self.render_patch(patch)
395 395 filediff.diffset = diffset
396 396 diffset.files.append(filediff)
397 397 diffset.changed_files += 1
398 398 if not patch['stats']['binary']:
399 399 diffset.lines_added += patch['stats']['added']
400 400 diffset.lines_deleted += patch['stats']['deleted']
401 401
402 402 return diffset
403 403
404 404 _lexer_cache = {}
405 405
406 406 def _get_lexer_for_filename(self, filename, filenode=None):
407 407 # cached because we might need to call it twice for source/target
408 408 if filename not in self._lexer_cache:
409 409 if filenode:
410 410 lexer = filenode.lexer
411 411 extension = filenode.extension
412 412 else:
413 413 lexer = FileNode.get_lexer(filename=filename)
414 414 extension = filename.split('.')[-1]
415 415
416 416 lexer = get_custom_lexer(extension) or lexer
417 417 self._lexer_cache[filename] = lexer
418 418 return self._lexer_cache[filename]
419 419
420 420 def render_patch(self, patch):
421 421 log.debug('rendering diff for %r' % patch['filename'])
422 422
423 423 source_filename = patch['original_filename']
424 424 target_filename = patch['filename']
425 425
426 426 source_lexer = plain_text_lexer
427 427 target_lexer = plain_text_lexer
428 428
429 429 if not patch['stats']['binary']:
430 430 if self.highlight_mode == self.HL_REAL:
431 431 if (source_filename and patch['operation'] in ('D', 'M')
432 432 and source_filename not in self.source_nodes):
433 433 self.source_nodes[source_filename] = (
434 434 self.source_node_getter(source_filename))
435 435
436 436 if (target_filename and patch['operation'] in ('A', 'M')
437 437 and target_filename not in self.target_nodes):
438 438 self.target_nodes[target_filename] = (
439 439 self.target_node_getter(target_filename))
440 440
441 441 elif self.highlight_mode == self.HL_FAST:
442 442 source_lexer = self._get_lexer_for_filename(source_filename)
443 443 target_lexer = self._get_lexer_for_filename(target_filename)
444 444
445 445 source_file = self.source_nodes.get(source_filename, source_filename)
446 446 target_file = self.target_nodes.get(target_filename, target_filename)
447 447
448 448 source_filenode, target_filenode = None, None
449 449
450 450 # TODO: dan: FileNode.lexer works on the content of the file - which
451 451 # can be slow - issue #4289 explains a lexer clean up - which once
452 452 # done can allow caching a lexer for a filenode to avoid the file lookup
453 453 if isinstance(source_file, FileNode):
454 454 source_filenode = source_file
455 455 #source_lexer = source_file.lexer
456 456 source_lexer = self._get_lexer_for_filename(source_filename)
457 457 source_file.lexer = source_lexer
458 458
459 459 if isinstance(target_file, FileNode):
460 460 target_filenode = target_file
461 461 #target_lexer = target_file.lexer
462 462 target_lexer = self._get_lexer_for_filename(target_filename)
463 463 target_file.lexer = target_lexer
464 464
465 465 source_file_path, target_file_path = None, None
466 466
467 467 if source_filename != '/dev/null':
468 468 source_file_path = source_filename
469 469 if target_filename != '/dev/null':
470 470 target_file_path = target_filename
471 471
472 472 source_file_type = source_lexer.name
473 473 target_file_type = target_lexer.name
474 474
475 475 filediff = AttributeDict({
476 476 'source_file_path': source_file_path,
477 477 'target_file_path': target_file_path,
478 478 'source_filenode': source_filenode,
479 479 'target_filenode': target_filenode,
480 480 'source_file_type': target_file_type,
481 481 'target_file_type': source_file_type,
482 482 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
483 483 'operation': patch['operation'],
484 484 'source_mode': patch['stats']['old_mode'],
485 485 'target_mode': patch['stats']['new_mode'],
486 486 'limited_diff': isinstance(patch, LimitedDiffContainer),
487 487 'hunks': [],
488 488 'diffset': self,
489 489 })
490 490
491 491 for hunk in patch['chunks'][1:]:
492 492 hunkbit = self.parse_hunk(hunk, source_file, target_file)
493 493 hunkbit.source_file_path = source_file_path
494 494 hunkbit.target_file_path = target_file_path
495 495 filediff.hunks.append(hunkbit)
496 496
497 497 left_comments = {}
498 498 if source_file_path in self.comments_store:
499 499 for lineno, comments in self.comments_store[source_file_path].items():
500 500 left_comments[lineno] = comments
501 501
502 502 if target_file_path in self.comments_store:
503 503 for lineno, comments in self.comments_store[target_file_path].items():
504 504 left_comments[lineno] = comments
505 505 # left comments are one that we couldn't place in diff lines.
506 506 # could be outdated, or the diff changed and this line is no
507 507 # longer available
508 508 filediff.left_comments = left_comments
509 509
510 510 return filediff
511 511
512 512 def parse_hunk(self, hunk, source_file, target_file):
513 513 result = AttributeDict(dict(
514 514 source_start=hunk['source_start'],
515 515 source_length=hunk['source_length'],
516 516 target_start=hunk['target_start'],
517 517 target_length=hunk['target_length'],
518 518 section_header=hunk['section_header'],
519 519 lines=[],
520 520 ))
521 521 before, after = [], []
522 522
523 523 for line in hunk['lines']:
524 524
525 525 if line['action'] == 'unmod':
526 526 result.lines.extend(
527 527 self.parse_lines(before, after, source_file, target_file))
528 528 after.append(line)
529 529 before.append(line)
530 530 elif line['action'] == 'add':
531 531 after.append(line)
532 532 elif line['action'] == 'del':
533 533 before.append(line)
534 534 elif line['action'] == 'old-no-nl':
535 535 before.append(line)
536 536 elif line['action'] == 'new-no-nl':
537 537 after.append(line)
538 538
539 539 result.lines.extend(
540 540 self.parse_lines(before, after, source_file, target_file))
541 541 result.unified = self.as_unified(result.lines)
542 542 result.sideside = result.lines
543 543
544 544 return result
545 545
546 546 def parse_lines(self, before_lines, after_lines, source_file, target_file):
547 547 # TODO: dan: investigate doing the diff comparison and fast highlighting
548 548 # on the entire before and after buffered block lines rather than by
549 549 # line, this means we can get better 'fast' highlighting if the context
550 550 # allows it - eg.
551 551 # line 4: """
552 552 # line 5: this gets highlighted as a string
553 553 # line 6: """
554 554
555 555 lines = []
556
557 before_newline = AttributeDict()
558 after_newline = AttributeDict()
559 if before_lines and before_lines[-1]['action'] == 'old-no-nl':
560 before_newline_line = before_lines.pop(-1)
561 before_newline.content = '\n {}'.format(
562 render_tokenstream(
563 [(x[0], '', x[1])
564 for x in [('nonl', before_newline_line['line'])]]))
565
566 if after_lines and after_lines[-1]['action'] == 'new-no-nl':
567 after_newline_line = after_lines.pop(-1)
568 after_newline.content = '\n {}'.format(
569 render_tokenstream(
570 [(x[0], '', x[1])
571 for x in [('nonl', after_newline_line['line'])]]))
572
556 573 while before_lines or after_lines:
557 574 before, after = None, None
558 575 before_tokens, after_tokens = None, None
559 576
560 577 if before_lines:
561 578 before = before_lines.pop(0)
562 579 if after_lines:
563 580 after = after_lines.pop(0)
564 581
565 582 original = AttributeDict()
566 583 modified = AttributeDict()
567 584
568 585 if before:
569 586 if before['action'] == 'old-no-nl':
570 587 before_tokens = [('nonl', before['line'])]
571 588 else:
572 589 before_tokens = self.get_line_tokens(
573 590 line_text=before['line'],
574 591 line_number=before['old_lineno'],
575 592 file=source_file)
576 593 original.lineno = before['old_lineno']
577 594 original.content = before['line']
578 595 original.action = self.action_to_op(before['action'])
579 596 original.comments = self.get_comments_for('old',
580 597 source_file, before['old_lineno'])
581 598
582 599 if after:
583 600 if after['action'] == 'new-no-nl':
584 601 after_tokens = [('nonl', after['line'])]
585 602 else:
586 603 after_tokens = self.get_line_tokens(
587 604 line_text=after['line'], line_number=after['new_lineno'],
588 605 file=target_file)
589 606 modified.lineno = after['new_lineno']
590 607 modified.content = after['line']
591 608 modified.action = self.action_to_op(after['action'])
592 609 modified.comments = self.get_comments_for('new',
593 610 target_file, after['new_lineno'])
594 611
595 612 # diff the lines
596 613 if before_tokens and after_tokens:
597 614 o_tokens, m_tokens, similarity = tokens_diff(
598 615 before_tokens, after_tokens)
599 616 original.content = render_tokenstream(o_tokens)
600 617 modified.content = render_tokenstream(m_tokens)
601 618 elif before_tokens:
602 619 original.content = render_tokenstream(
603 620 [(x[0], '', x[1]) for x in before_tokens])
604 621 elif after_tokens:
605 622 modified.content = render_tokenstream(
606 623 [(x[0], '', x[1]) for x in after_tokens])
607 624
625 if not before_lines and before_newline:
626 original.content += before_newline.content
627 before_newline = None
628 if not after_lines and after_newline:
629 modified.content += after_newline.content
630 after_newline = None
631
608 632 lines.append(AttributeDict({
609 633 'original': original,
610 634 'modified': modified,
611 635 }))
612 636
613 637 return lines
614 638
615 639 def get_comments_for(self, version, filename, line_number):
616 640 if hasattr(filename, 'unicode_path'):
617 641 filename = filename.unicode_path
618 642
619 643 if not isinstance(filename, basestring):
620 644 return None
621 645
622 646 line_key = {
623 647 'old': 'o',
624 648 'new': 'n',
625 649 }[version] + str(line_number)
626 650
627 651 if filename in self.comments_store:
628 652 file_comments = self.comments_store[filename]
629 653 if line_key in file_comments:
630 654 return file_comments.pop(line_key)
631 655
632 656 def get_line_tokens(self, line_text, line_number, file=None):
633 657 filenode = None
634 658 filename = None
635 659
636 660 if isinstance(file, basestring):
637 661 filename = file
638 662 elif isinstance(file, FileNode):
639 663 filenode = file
640 664 filename = file.unicode_path
641 665
642 666 if self.highlight_mode == self.HL_REAL and filenode:
643 667 lexer = self._get_lexer_for_filename(filename)
644 668 file_size_allowed = file.size < self.max_file_size_limit
645 669 if line_number and file_size_allowed:
646 670 return self.get_tokenized_filenode_line(
647 671 file, line_number, lexer)
648 672
649 673 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
650 674 lexer = self._get_lexer_for_filename(filename)
651 675 return list(tokenize_string(line_text, lexer))
652 676
653 677 return list(tokenize_string(line_text, plain_text_lexer))
654 678
655 679 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
656 680
657 681 if filenode not in self.highlighted_filenodes:
658 682 tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
659 683 self.highlighted_filenodes[filenode] = tokenized_lines
660 684 return self.highlighted_filenodes[filenode][line_number - 1]
661 685
662 686 def action_to_op(self, action):
663 687 return {
664 688 'add': '+',
665 689 'del': '-',
666 690 'unmod': ' ',
667 691 'old-no-nl': ' ',
668 692 'new-no-nl': ' ',
669 693 }.get(action, action)
670 694
671 695 def as_unified(self, lines):
672 696 """
673 697 Return a generator that yields the lines of a diff in unified order
674 698 """
675 699 def generator():
676 700 buf = []
677 701 for line in lines:
678 702
679 703 if buf and not line.original or line.original.action == ' ':
680 704 for b in buf:
681 705 yield b
682 706 buf = []
683 707
684 708 if line.original:
685 709 if line.original.action == ' ':
686 710 yield (line.original.lineno, line.modified.lineno,
687 711 line.original.action, line.original.content,
688 712 line.original.comments)
689 713 continue
690 714
691 715 if line.original.action == '-':
692 716 yield (line.original.lineno, None,
693 717 line.original.action, line.original.content,
694 718 line.original.comments)
695 719
696 720 if line.modified.action == '+':
697 721 buf.append((
698 722 None, line.modified.lineno,
699 723 line.modified.action, line.modified.content,
700 724 line.modified.comments))
701 725 continue
702 726
703 727 if line.modified:
704 728 yield (None, line.modified.lineno,
705 729 line.modified.action, line.modified.content,
706 730 line.modified.comments)
707 731
708 732 for b in buf:
709 733 yield b
710 734
711 735 return generator()
@@ -1,1107 +1,1107 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Set of diffing helpers, previously part of vcs
24 24 """
25 25
26 26 import re
27 27 import collections
28 28 import difflib
29 29 import logging
30 30
31 31 from itertools import tee, imap
32 32
33 33 from rhodecode.lib.vcs.exceptions import VCSError
34 34 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
35 35 from rhodecode.lib.utils2 import safe_unicode
36 36
37 37 log = logging.getLogger(__name__)
38 38
39 39 # define max context, a file with more than this numbers of lines is unusable
40 40 # in browser anyway
41 41 MAX_CONTEXT = 1024 * 1014
42 42
43 43
44 44 class OPS(object):
45 45 ADD = 'A'
46 46 MOD = 'M'
47 47 DEL = 'D'
48 48
49 49
50 50 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
51 51 """
52 52 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
53 53
54 54 :param ignore_whitespace: ignore whitespaces in diff
55 55 """
56 56 # make sure we pass in default context
57 57 context = context or 3
58 58 # protect against IntOverflow when passing HUGE context
59 59 if context > MAX_CONTEXT:
60 60 context = MAX_CONTEXT
61 61
62 62 submodules = filter(lambda o: isinstance(o, SubModuleNode),
63 63 [filenode_new, filenode_old])
64 64 if submodules:
65 65 return ''
66 66
67 67 for filenode in (filenode_old, filenode_new):
68 68 if not isinstance(filenode, FileNode):
69 69 raise VCSError(
70 70 "Given object should be FileNode object, not %s"
71 71 % filenode.__class__)
72 72
73 73 repo = filenode_new.commit.repository
74 74 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
75 75 new_commit = filenode_new.commit
76 76
77 77 vcs_gitdiff = repo.get_diff(
78 78 old_commit, new_commit, filenode_new.path,
79 79 ignore_whitespace, context, path1=filenode_old.path)
80 80 return vcs_gitdiff
81 81
82 82 NEW_FILENODE = 1
83 83 DEL_FILENODE = 2
84 84 MOD_FILENODE = 3
85 85 RENAMED_FILENODE = 4
86 86 COPIED_FILENODE = 5
87 87 CHMOD_FILENODE = 6
88 88 BIN_FILENODE = 7
89 89
90 90
91 91 class LimitedDiffContainer(object):
92 92
93 93 def __init__(self, diff_limit, cur_diff_size, diff):
94 94 self.diff = diff
95 95 self.diff_limit = diff_limit
96 96 self.cur_diff_size = cur_diff_size
97 97
98 98 def __getitem__(self, key):
99 99 return self.diff.__getitem__(key)
100 100
101 101 def __iter__(self):
102 102 for l in self.diff:
103 103 yield l
104 104
105 105
106 106 class Action(object):
107 107 """
108 108 Contains constants for the action value of the lines in a parsed diff.
109 109 """
110 110
111 111 ADD = 'add'
112 112 DELETE = 'del'
113 113 UNMODIFIED = 'unmod'
114 114
115 115 CONTEXT = 'context'
116 116 OLD_NO_NL = 'old-no-nl'
117 117 NEW_NO_NL = 'new-no-nl'
118 118
119 119
120 120 class DiffProcessor(object):
121 121 """
122 122 Give it a unified or git diff and it returns a list of the files that were
123 123 mentioned in the diff together with a dict of meta information that
124 124 can be used to render it in a HTML template.
125 125
126 126 .. note:: Unicode handling
127 127
128 128 The original diffs are a byte sequence and can contain filenames
129 129 in mixed encodings. This class generally returns `unicode` objects
130 130 since the result is intended for presentation to the user.
131 131
132 132 """
133 133 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
134 134 _newline_marker = re.compile(r'^\\ No newline at end of file')
135 135
136 136 # used for inline highlighter word split
137 137 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
138 138
139 139 # collapse ranges of commits over given number
140 140 _collapse_commits_over = 5
141 141
142 142 def __init__(self, diff, format='gitdiff', diff_limit=None,
143 143 file_limit=None, show_full_diff=True):
144 144 """
145 145 :param diff: A `Diff` object representing a diff from a vcs backend
146 146 :param format: format of diff passed, `udiff` or `gitdiff`
147 147 :param diff_limit: define the size of diff that is considered "big"
148 148 based on that parameter cut off will be triggered, set to None
149 149 to show full diff
150 150 """
151 151 self._diff = diff
152 152 self._format = format
153 153 self.adds = 0
154 154 self.removes = 0
155 155 # calculate diff size
156 156 self.diff_limit = diff_limit
157 157 self.file_limit = file_limit
158 158 self.show_full_diff = show_full_diff
159 159 self.cur_diff_size = 0
160 160 self.parsed = False
161 161 self.parsed_diff = []
162 162
163 163 log.debug('Initialized DiffProcessor with %s mode', format)
164 164 if format == 'gitdiff':
165 165 self.differ = self._highlight_line_difflib
166 166 self._parser = self._parse_gitdiff
167 167 else:
168 168 self.differ = self._highlight_line_udiff
169 169 self._parser = self._new_parse_gitdiff
170 170
171 171 def _copy_iterator(self):
172 172 """
173 173 make a fresh copy of generator, we should not iterate thru
174 174 an original as it's needed for repeating operations on
175 175 this instance of DiffProcessor
176 176 """
177 177 self.__udiff, iterator_copy = tee(self.__udiff)
178 178 return iterator_copy
179 179
180 180 def _escaper(self, string):
181 181 """
182 182 Escaper for diff escapes special chars and checks the diff limit
183 183
184 184 :param string:
185 185 """
186 186
187 187 self.cur_diff_size += len(string)
188 188
189 189 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
190 190 raise DiffLimitExceeded('Diff Limit Exceeded')
191 191
192 192 return safe_unicode(string)\
193 193 .replace('&', '&amp;')\
194 194 .replace('<', '&lt;')\
195 195 .replace('>', '&gt;')
196 196
197 197 def _line_counter(self, l):
198 198 """
199 199 Checks each line and bumps total adds/removes for this diff
200 200
201 201 :param l:
202 202 """
203 203 if l.startswith('+') and not l.startswith('+++'):
204 204 self.adds += 1
205 205 elif l.startswith('-') and not l.startswith('---'):
206 206 self.removes += 1
207 207 return safe_unicode(l)
208 208
209 209 def _highlight_line_difflib(self, line, next_):
210 210 """
211 211 Highlight inline changes in both lines.
212 212 """
213 213
214 214 if line['action'] == Action.DELETE:
215 215 old, new = line, next_
216 216 else:
217 217 old, new = next_, line
218 218
219 219 oldwords = self._token_re.split(old['line'])
220 220 newwords = self._token_re.split(new['line'])
221 221 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
222 222
223 223 oldfragments, newfragments = [], []
224 224 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
225 225 oldfrag = ''.join(oldwords[i1:i2])
226 226 newfrag = ''.join(newwords[j1:j2])
227 227 if tag != 'equal':
228 228 if oldfrag:
229 229 oldfrag = '<del>%s</del>' % oldfrag
230 230 if newfrag:
231 231 newfrag = '<ins>%s</ins>' % newfrag
232 232 oldfragments.append(oldfrag)
233 233 newfragments.append(newfrag)
234 234
235 235 old['line'] = "".join(oldfragments)
236 236 new['line'] = "".join(newfragments)
237 237
238 238 def _highlight_line_udiff(self, line, next_):
239 239 """
240 240 Highlight inline changes in both lines.
241 241 """
242 242 start = 0
243 243 limit = min(len(line['line']), len(next_['line']))
244 244 while start < limit and line['line'][start] == next_['line'][start]:
245 245 start += 1
246 246 end = -1
247 247 limit -= start
248 248 while -end <= limit and line['line'][end] == next_['line'][end]:
249 249 end -= 1
250 250 end += 1
251 251 if start or end:
252 252 def do(l):
253 253 last = end + len(l['line'])
254 254 if l['action'] == Action.ADD:
255 255 tag = 'ins'
256 256 else:
257 257 tag = 'del'
258 258 l['line'] = '%s<%s>%s</%s>%s' % (
259 259 l['line'][:start],
260 260 tag,
261 261 l['line'][start:last],
262 262 tag,
263 263 l['line'][last:]
264 264 )
265 265 do(line)
266 266 do(next_)
267 267
268 268 def _clean_line(self, line, command):
269 269 if command in ['+', '-', ' ']:
270 270 # only modify the line if it's actually a diff thing
271 271 line = line[1:]
272 272 return line
273 273
274 274 def _parse_gitdiff(self, inline_diff=True):
275 275 _files = []
276 276 diff_container = lambda arg: arg
277 277
278 278 for chunk in self._diff.chunks():
279 279 head = chunk.header
280 280
281 281 diff = imap(self._escaper, chunk.diff.splitlines(1))
282 282 raw_diff = chunk.raw
283 283 limited_diff = False
284 284 exceeds_limit = False
285 285
286 286 op = None
287 287 stats = {
288 288 'added': 0,
289 289 'deleted': 0,
290 290 'binary': False,
291 291 'ops': {},
292 292 }
293 293
294 294 if head['deleted_file_mode']:
295 295 op = OPS.DEL
296 296 stats['binary'] = True
297 297 stats['ops'][DEL_FILENODE] = 'deleted file'
298 298
299 299 elif head['new_file_mode']:
300 300 op = OPS.ADD
301 301 stats['binary'] = True
302 302 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
303 303 else: # modify operation, can be copy, rename or chmod
304 304
305 305 # CHMOD
306 306 if head['new_mode'] and head['old_mode']:
307 307 op = OPS.MOD
308 308 stats['binary'] = True
309 309 stats['ops'][CHMOD_FILENODE] = (
310 310 'modified file chmod %s => %s' % (
311 311 head['old_mode'], head['new_mode']))
312 312 # RENAME
313 313 if head['rename_from'] != head['rename_to']:
314 314 op = OPS.MOD
315 315 stats['binary'] = True
316 316 stats['ops'][RENAMED_FILENODE] = (
317 317 'file renamed from %s to %s' % (
318 318 head['rename_from'], head['rename_to']))
319 319 # COPY
320 320 if head.get('copy_from') and head.get('copy_to'):
321 321 op = OPS.MOD
322 322 stats['binary'] = True
323 323 stats['ops'][COPIED_FILENODE] = (
324 324 'file copied from %s to %s' % (
325 325 head['copy_from'], head['copy_to']))
326 326
327 327 # If our new parsed headers didn't match anything fallback to
328 328 # old style detection
329 329 if op is None:
330 330 if not head['a_file'] and head['b_file']:
331 331 op = OPS.ADD
332 332 stats['binary'] = True
333 333 stats['ops'][NEW_FILENODE] = 'new file'
334 334
335 335 elif head['a_file'] and not head['b_file']:
336 336 op = OPS.DEL
337 337 stats['binary'] = True
338 338 stats['ops'][DEL_FILENODE] = 'deleted file'
339 339
340 340 # it's not ADD not DELETE
341 341 if op is None:
342 342 op = OPS.MOD
343 343 stats['binary'] = True
344 344 stats['ops'][MOD_FILENODE] = 'modified file'
345 345
346 346 # a real non-binary diff
347 347 if head['a_file'] or head['b_file']:
348 348 try:
349 349 raw_diff, chunks, _stats = self._parse_lines(diff)
350 350 stats['binary'] = False
351 351 stats['added'] = _stats[0]
352 352 stats['deleted'] = _stats[1]
353 353 # explicit mark that it's a modified file
354 354 if op == OPS.MOD:
355 355 stats['ops'][MOD_FILENODE] = 'modified file'
356 356 exceeds_limit = len(raw_diff) > self.file_limit
357 357
358 358 # changed from _escaper function so we validate size of
359 359 # each file instead of the whole diff
360 360 # diff will hide big files but still show small ones
361 361 # from my tests, big files are fairly safe to be parsed
362 362 # but the browser is the bottleneck
363 363 if not self.show_full_diff and exceeds_limit:
364 364 raise DiffLimitExceeded('File Limit Exceeded')
365 365
366 366 except DiffLimitExceeded:
367 367 diff_container = lambda _diff: \
368 368 LimitedDiffContainer(
369 369 self.diff_limit, self.cur_diff_size, _diff)
370 370
371 371 exceeds_limit = len(raw_diff) > self.file_limit
372 372 limited_diff = True
373 373 chunks = []
374 374
375 375 else: # GIT format binary patch, or possibly empty diff
376 376 if head['bin_patch']:
377 377 # we have operation already extracted, but we mark simply
378 378 # it's a diff we wont show for binary files
379 379 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
380 380 chunks = []
381 381
382 382 if chunks and not self.show_full_diff and op == OPS.DEL:
383 383 # if not full diff mode show deleted file contents
384 384 # TODO: anderson: if the view is not too big, there is no way
385 385 # to see the content of the file
386 386 chunks = []
387 387
388 388 chunks.insert(0, [{
389 389 'old_lineno': '',
390 390 'new_lineno': '',
391 391 'action': Action.CONTEXT,
392 392 'line': msg,
393 393 } for _op, msg in stats['ops'].iteritems()
394 394 if _op not in [MOD_FILENODE]])
395 395
396 396 _files.append({
397 397 'filename': safe_unicode(head['b_path']),
398 398 'old_revision': head['a_blob_id'],
399 399 'new_revision': head['b_blob_id'],
400 400 'chunks': chunks,
401 401 'raw_diff': safe_unicode(raw_diff),
402 402 'operation': op,
403 403 'stats': stats,
404 404 'exceeds_limit': exceeds_limit,
405 405 'is_limited_diff': limited_diff,
406 406 })
407 407
408 408 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
409 409 OPS.DEL: 2}.get(info['operation'])
410 410
411 411 if not inline_diff:
412 412 return diff_container(sorted(_files, key=sorter))
413 413
414 414 # highlight inline changes
415 415 for diff_data in _files:
416 416 for chunk in diff_data['chunks']:
417 417 lineiter = iter(chunk)
418 418 try:
419 419 while 1:
420 420 line = lineiter.next()
421 421 if line['action'] not in (
422 422 Action.UNMODIFIED, Action.CONTEXT):
423 423 nextline = lineiter.next()
424 424 if nextline['action'] in ['unmod', 'context'] or \
425 425 nextline['action'] == line['action']:
426 426 continue
427 427 self.differ(line, nextline)
428 428 except StopIteration:
429 429 pass
430 430
431 431 return diff_container(sorted(_files, key=sorter))
432 432
433 433 def _check_large_diff(self):
434 434 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
435 435 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
436 436 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
437 437
438 438 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
439 439 def _new_parse_gitdiff(self, inline_diff=True):
440 440 _files = []
441 441
442 442 # this can be overriden later to a LimitedDiffContainer type
443 443 diff_container = lambda arg: arg
444 444
445 445 for chunk in self._diff.chunks():
446 446 head = chunk.header
447 447 log.debug('parsing diff %r' % head)
448 448
449 449 raw_diff = chunk.raw
450 450 limited_diff = False
451 451 exceeds_limit = False
452 452
453 453 op = None
454 454 stats = {
455 455 'added': 0,
456 456 'deleted': 0,
457 457 'binary': False,
458 458 'old_mode': None,
459 459 'new_mode': None,
460 460 'ops': {},
461 461 }
462 462 if head['old_mode']:
463 463 stats['old_mode'] = head['old_mode']
464 464 if head['new_mode']:
465 465 stats['new_mode'] = head['new_mode']
466 466 if head['b_mode']:
467 467 stats['new_mode'] = head['b_mode']
468 468
469 469 # delete file
470 470 if head['deleted_file_mode']:
471 471 op = OPS.DEL
472 472 stats['binary'] = True
473 473 stats['ops'][DEL_FILENODE] = 'deleted file'
474 474
475 475 # new file
476 476 elif head['new_file_mode']:
477 477 op = OPS.ADD
478 478 stats['binary'] = True
479 479 stats['old_mode'] = None
480 480 stats['new_mode'] = head['new_file_mode']
481 481 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
482 482
483 483 # modify operation, can be copy, rename or chmod
484 484 else:
485 485 # CHMOD
486 486 if head['new_mode'] and head['old_mode']:
487 487 op = OPS.MOD
488 488 stats['binary'] = True
489 489 stats['ops'][CHMOD_FILENODE] = (
490 490 'modified file chmod %s => %s' % (
491 491 head['old_mode'], head['new_mode']))
492 492
493 493 # RENAME
494 494 if head['rename_from'] != head['rename_to']:
495 495 op = OPS.MOD
496 496 stats['binary'] = True
497 497 stats['renamed'] = (head['rename_from'], head['rename_to'])
498 498 stats['ops'][RENAMED_FILENODE] = (
499 499 'file renamed from %s to %s' % (
500 500 head['rename_from'], head['rename_to']))
501 501 # COPY
502 502 if head.get('copy_from') and head.get('copy_to'):
503 503 op = OPS.MOD
504 504 stats['binary'] = True
505 505 stats['copied'] = (head['copy_from'], head['copy_to'])
506 506 stats['ops'][COPIED_FILENODE] = (
507 507 'file copied from %s to %s' % (
508 508 head['copy_from'], head['copy_to']))
509 509
510 510 # If our new parsed headers didn't match anything fallback to
511 511 # old style detection
512 512 if op is None:
513 513 if not head['a_file'] and head['b_file']:
514 514 op = OPS.ADD
515 515 stats['binary'] = True
516 516 stats['new_file'] = True
517 517 stats['ops'][NEW_FILENODE] = 'new file'
518 518
519 519 elif head['a_file'] and not head['b_file']:
520 520 op = OPS.DEL
521 521 stats['binary'] = True
522 522 stats['ops'][DEL_FILENODE] = 'deleted file'
523 523
524 524 # it's not ADD not DELETE
525 525 if op is None:
526 526 op = OPS.MOD
527 527 stats['binary'] = True
528 528 stats['ops'][MOD_FILENODE] = 'modified file'
529 529
530 530 # a real non-binary diff
531 531 if head['a_file'] or head['b_file']:
532 532 diff = iter(chunk.diff.splitlines(1))
533 533
534 534 # append each file to the diff size
535 535 raw_chunk_size = len(raw_diff)
536 536
537 537 exceeds_limit = raw_chunk_size > self.file_limit
538 538 self.cur_diff_size += raw_chunk_size
539 539
540 540 try:
541 541 # Check each file instead of the whole diff.
542 542 # Diff will hide big files but still show small ones.
543 543 # From the tests big files are fairly safe to be parsed
544 544 # but the browser is the bottleneck.
545 545 if not self.show_full_diff and exceeds_limit:
546 546 log.debug('File `%s` exceeds current file_limit of %s',
547 547 safe_unicode(head['b_path']), self.file_limit)
548 548 raise DiffLimitExceeded(
549 549 'File Limit %s Exceeded', self.file_limit)
550 550
551 551 self._check_large_diff()
552 552
553 553 raw_diff, chunks, _stats = self._new_parse_lines(diff)
554 554 stats['binary'] = False
555 555 stats['added'] = _stats[0]
556 556 stats['deleted'] = _stats[1]
557 557 # explicit mark that it's a modified file
558 558 if op == OPS.MOD:
559 559 stats['ops'][MOD_FILENODE] = 'modified file'
560 560
561 561 except DiffLimitExceeded:
562 562 diff_container = lambda _diff: \
563 563 LimitedDiffContainer(
564 564 self.diff_limit, self.cur_diff_size, _diff)
565 565
566 566 limited_diff = True
567 567 chunks = []
568 568
569 569 else: # GIT format binary patch, or possibly empty diff
570 570 if head['bin_patch']:
571 571 # we have operation already extracted, but we mark simply
572 572 # it's a diff we wont show for binary files
573 573 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
574 574 chunks = []
575 575
576 576 # Hide content of deleted node by setting empty chunks
577 577 if chunks and not self.show_full_diff and op == OPS.DEL:
578 578 # if not full diff mode show deleted file contents
579 579 # TODO: anderson: if the view is not too big, there is no way
580 580 # to see the content of the file
581 581 chunks = []
582 582
583 583 chunks.insert(
584 584 0, [{'old_lineno': '',
585 585 'new_lineno': '',
586 586 'action': Action.CONTEXT,
587 587 'line': msg,
588 588 } for _op, msg in stats['ops'].iteritems()
589 589 if _op not in [MOD_FILENODE]])
590 590
591 591 original_filename = safe_unicode(head['a_path'])
592 592 _files.append({
593 593 'original_filename': original_filename,
594 594 'filename': safe_unicode(head['b_path']),
595 595 'old_revision': head['a_blob_id'],
596 596 'new_revision': head['b_blob_id'],
597 597 'chunks': chunks,
598 598 'raw_diff': safe_unicode(raw_diff),
599 599 'operation': op,
600 600 'stats': stats,
601 601 'exceeds_limit': exceeds_limit,
602 602 'is_limited_diff': limited_diff,
603 603 })
604 604
605 605 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
606 606 OPS.DEL: 2}.get(info['operation'])
607 607
608 608 return diff_container(sorted(_files, key=sorter))
609 609
610 610 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
611 611 def _parse_lines(self, diff):
612 612 """
613 613 Parse the diff an return data for the template.
614 614 """
615 615
616 616 lineiter = iter(diff)
617 617 stats = [0, 0]
618 618 chunks = []
619 619 raw_diff = []
620 620
621 621 try:
622 622 line = lineiter.next()
623 623
624 624 while line:
625 625 raw_diff.append(line)
626 626 lines = []
627 627 chunks.append(lines)
628 628
629 629 match = self._chunk_re.match(line)
630 630
631 631 if not match:
632 632 break
633 633
634 634 gr = match.groups()
635 635 (old_line, old_end,
636 636 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
637 637 old_line -= 1
638 638 new_line -= 1
639 639
640 640 context = len(gr) == 5
641 641 old_end += old_line
642 642 new_end += new_line
643 643
644 644 if context:
645 645 # skip context only if it's first line
646 646 if int(gr[0]) > 1:
647 647 lines.append({
648 648 'old_lineno': '...',
649 649 'new_lineno': '...',
650 650 'action': Action.CONTEXT,
651 651 'line': line,
652 652 })
653 653
654 654 line = lineiter.next()
655 655
656 656 while old_line < old_end or new_line < new_end:
657 657 command = ' '
658 658 if line:
659 659 command = line[0]
660 660
661 661 affects_old = affects_new = False
662 662
663 663 # ignore those if we don't expect them
664 664 if command in '#@':
665 665 continue
666 666 elif command == '+':
667 667 affects_new = True
668 668 action = Action.ADD
669 669 stats[0] += 1
670 670 elif command == '-':
671 671 affects_old = True
672 672 action = Action.DELETE
673 673 stats[1] += 1
674 674 else:
675 675 affects_old = affects_new = True
676 676 action = Action.UNMODIFIED
677 677
678 678 if not self._newline_marker.match(line):
679 679 old_line += affects_old
680 680 new_line += affects_new
681 681 lines.append({
682 682 'old_lineno': affects_old and old_line or '',
683 683 'new_lineno': affects_new and new_line or '',
684 684 'action': action,
685 685 'line': self._clean_line(line, command)
686 686 })
687 687 raw_diff.append(line)
688 688
689 689 line = lineiter.next()
690 690
691 691 if self._newline_marker.match(line):
692 692 # we need to append to lines, since this is not
693 693 # counted in the line specs of diff
694 694 lines.append({
695 695 'old_lineno': '...',
696 696 'new_lineno': '...',
697 697 'action': Action.CONTEXT,
698 698 'line': self._clean_line(line, command)
699 699 })
700 700
701 701 except StopIteration:
702 702 pass
703 703 return ''.join(raw_diff), chunks, stats
704 704
705 705 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
706 706 def _new_parse_lines(self, diff_iter):
707 707 """
708 708 Parse the diff an return data for the template.
709 709 """
710 710
711 711 stats = [0, 0]
712 712 chunks = []
713 713 raw_diff = []
714 714
715 715 diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
716 716
717 717 try:
718 718 line = diff_iter.next()
719 719
720 720 while line:
721 721 raw_diff.append(line)
722 722 match = self._chunk_re.match(line)
723 723
724 724 if not match:
725 725 break
726 726
727 727 gr = match.groups()
728 728 (old_line, old_end,
729 729 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
730 730
731 731 lines = []
732 732 hunk = {
733 733 'section_header': gr[-1],
734 734 'source_start': old_line,
735 735 'source_length': old_end,
736 736 'target_start': new_line,
737 737 'target_length': new_end,
738 738 'lines': lines,
739 739 }
740 740 chunks.append(hunk)
741 741
742 742 old_line -= 1
743 743 new_line -= 1
744 744
745 745 context = len(gr) == 5
746 746 old_end += old_line
747 747 new_end += new_line
748 748
749 749 line = diff_iter.next()
750 750
751 751 while old_line < old_end or new_line < new_end:
752 752 command = ' '
753 753 if line:
754 754 command = line[0]
755 755
756 756 affects_old = affects_new = False
757 757
758 758 # ignore those if we don't expect them
759 759 if command in '#@':
760 760 continue
761 761 elif command == '+':
762 762 affects_new = True
763 763 action = Action.ADD
764 764 stats[0] += 1
765 765 elif command == '-':
766 766 affects_old = True
767 767 action = Action.DELETE
768 768 stats[1] += 1
769 769 else:
770 770 affects_old = affects_new = True
771 771 action = Action.UNMODIFIED
772 772
773 773 if not self._newline_marker.match(line):
774 774 old_line += affects_old
775 775 new_line += affects_new
776 776 lines.append({
777 777 'old_lineno': affects_old and old_line or '',
778 778 'new_lineno': affects_new and new_line or '',
779 779 'action': action,
780 780 'line': self._clean_line(line, command)
781 781 })
782 raw_diff.append(line)
782 raw_diff.append(line)
783 783
784 784 line = diff_iter.next()
785 785
786 786 if self._newline_marker.match(line):
787 787 # we need to append to lines, since this is not
788 788 # counted in the line specs of diff
789 789 if affects_old:
790 790 action = Action.OLD_NO_NL
791 791 elif affects_new:
792 792 action = Action.NEW_NO_NL
793 793 else:
794 794 raise Exception('invalid context for no newline')
795 795
796 796 lines.append({
797 797 'old_lineno': None,
798 798 'new_lineno': None,
799 799 'action': action,
800 800 'line': self._clean_line(line, command)
801 801 })
802 802
803 803 except StopIteration:
804 804 pass
805 805
806 806 return ''.join(raw_diff), chunks, stats
807 807
808 808 def _safe_id(self, idstring):
809 809 """Make a string safe for including in an id attribute.
810 810
811 811 The HTML spec says that id attributes 'must begin with
812 812 a letter ([A-Za-z]) and may be followed by any number
813 813 of letters, digits ([0-9]), hyphens ("-"), underscores
814 814 ("_"), colons (":"), and periods (".")'. These regexps
815 815 are slightly over-zealous, in that they remove colons
816 816 and periods unnecessarily.
817 817
818 818 Whitespace is transformed into underscores, and then
819 819 anything which is not a hyphen or a character that
820 820 matches \w (alphanumerics and underscore) is removed.
821 821
822 822 """
823 823 # Transform all whitespace to underscore
824 824 idstring = re.sub(r'\s', "_", '%s' % idstring)
825 825 # Remove everything that is not a hyphen or a member of \w
826 826 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
827 827 return idstring
828 828
829 829 def prepare(self, inline_diff=True):
830 830 """
831 831 Prepare the passed udiff for HTML rendering.
832 832
833 833 :return: A list of dicts with diff information.
834 834 """
835 835 parsed = self._parser(inline_diff=inline_diff)
836 836 self.parsed = True
837 837 self.parsed_diff = parsed
838 838 return parsed
839 839
840 840 def as_raw(self, diff_lines=None):
841 841 """
842 842 Returns raw diff as a byte string
843 843 """
844 844 return self._diff.raw
845 845
846 846 def as_html(self, table_class='code-difftable', line_class='line',
847 847 old_lineno_class='lineno old', new_lineno_class='lineno new',
848 848 code_class='code', enable_comments=False, parsed_lines=None):
849 849 """
850 850 Return given diff as html table with customized css classes
851 851 """
852 852 # TODO(marcink): not sure how to pass in translator
853 853 # here in an efficient way, leave the _ for proper gettext extraction
854 854 _ = lambda s: s
855 855
856 856 def _link_to_if(condition, label, url):
857 857 """
858 858 Generates a link if condition is meet or just the label if not.
859 859 """
860 860
861 861 if condition:
862 862 return '''<a href="%(url)s" class="tooltip"
863 863 title="%(title)s">%(label)s</a>''' % {
864 864 'title': _('Click to select line'),
865 865 'url': url,
866 866 'label': label
867 867 }
868 868 else:
869 869 return label
870 870 if not self.parsed:
871 871 self.prepare()
872 872
873 873 diff_lines = self.parsed_diff
874 874 if parsed_lines:
875 875 diff_lines = parsed_lines
876 876
877 877 _html_empty = True
878 878 _html = []
879 879 _html.append('''<table class="%(table_class)s">\n''' % {
880 880 'table_class': table_class
881 881 })
882 882
883 883 for diff in diff_lines:
884 884 for line in diff['chunks']:
885 885 _html_empty = False
886 886 for change in line:
887 887 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
888 888 'lc': line_class,
889 889 'action': change['action']
890 890 })
891 891 anchor_old_id = ''
892 892 anchor_new_id = ''
893 893 anchor_old = "%(filename)s_o%(oldline_no)s" % {
894 894 'filename': self._safe_id(diff['filename']),
895 895 'oldline_no': change['old_lineno']
896 896 }
897 897 anchor_new = "%(filename)s_n%(oldline_no)s" % {
898 898 'filename': self._safe_id(diff['filename']),
899 899 'oldline_no': change['new_lineno']
900 900 }
901 901 cond_old = (change['old_lineno'] != '...' and
902 902 change['old_lineno'])
903 903 cond_new = (change['new_lineno'] != '...' and
904 904 change['new_lineno'])
905 905 if cond_old:
906 906 anchor_old_id = 'id="%s"' % anchor_old
907 907 if cond_new:
908 908 anchor_new_id = 'id="%s"' % anchor_new
909 909
910 910 if change['action'] != Action.CONTEXT:
911 911 anchor_link = True
912 912 else:
913 913 anchor_link = False
914 914
915 915 ###########################################################
916 916 # COMMENT ICONS
917 917 ###########################################################
918 918 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
919 919
920 920 if enable_comments and change['action'] != Action.CONTEXT:
921 921 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
922 922
923 923 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
924 924
925 925 ###########################################################
926 926 # OLD LINE NUMBER
927 927 ###########################################################
928 928 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
929 929 'a_id': anchor_old_id,
930 930 'olc': old_lineno_class
931 931 })
932 932
933 933 _html.append('''%(link)s''' % {
934 934 'link': _link_to_if(anchor_link, change['old_lineno'],
935 935 '#%s' % anchor_old)
936 936 })
937 937 _html.append('''</td>\n''')
938 938 ###########################################################
939 939 # NEW LINE NUMBER
940 940 ###########################################################
941 941
942 942 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
943 943 'a_id': anchor_new_id,
944 944 'nlc': new_lineno_class
945 945 })
946 946
947 947 _html.append('''%(link)s''' % {
948 948 'link': _link_to_if(anchor_link, change['new_lineno'],
949 949 '#%s' % anchor_new)
950 950 })
951 951 _html.append('''</td>\n''')
952 952 ###########################################################
953 953 # CODE
954 954 ###########################################################
955 955 code_classes = [code_class]
956 956 if (not enable_comments or
957 957 change['action'] == Action.CONTEXT):
958 958 code_classes.append('no-comment')
959 959 _html.append('\t<td class="%s">' % ' '.join(code_classes))
960 960 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
961 961 'code': change['line']
962 962 })
963 963
964 964 _html.append('''\t</td>''')
965 965 _html.append('''\n</tr>\n''')
966 966 _html.append('''</table>''')
967 967 if _html_empty:
968 968 return None
969 969 return ''.join(_html)
970 970
971 971 def stat(self):
972 972 """
973 973 Returns tuple of added, and removed lines for this instance
974 974 """
975 975 return self.adds, self.removes
976 976
977 977 def get_context_of_line(
978 978 self, path, diff_line=None, context_before=3, context_after=3):
979 979 """
980 980 Returns the context lines for the specified diff line.
981 981
982 982 :type diff_line: :class:`DiffLineNumber`
983 983 """
984 984 assert self.parsed, "DiffProcessor is not initialized."
985 985
986 986 if None not in diff_line:
987 987 raise ValueError(
988 988 "Cannot specify both line numbers: {}".format(diff_line))
989 989
990 990 file_diff = self._get_file_diff(path)
991 991 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
992 992
993 993 first_line_to_include = max(idx - context_before, 0)
994 994 first_line_after_context = idx + context_after + 1
995 995 context_lines = chunk[first_line_to_include:first_line_after_context]
996 996
997 997 line_contents = [
998 998 _context_line(line) for line in context_lines
999 999 if _is_diff_content(line)]
1000 1000 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1001 1001 # Once they are fixed, we can drop this line here.
1002 1002 if line_contents:
1003 1003 line_contents[-1] = (
1004 1004 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1005 1005 return line_contents
1006 1006
1007 1007 def find_context(self, path, context, offset=0):
1008 1008 """
1009 1009 Finds the given `context` inside of the diff.
1010 1010
1011 1011 Use the parameter `offset` to specify which offset the target line has
1012 1012 inside of the given `context`. This way the correct diff line will be
1013 1013 returned.
1014 1014
1015 1015 :param offset: Shall be used to specify the offset of the main line
1016 1016 within the given `context`.
1017 1017 """
1018 1018 if offset < 0 or offset >= len(context):
1019 1019 raise ValueError(
1020 1020 "Only positive values up to the length of the context "
1021 1021 "minus one are allowed.")
1022 1022
1023 1023 matches = []
1024 1024 file_diff = self._get_file_diff(path)
1025 1025
1026 1026 for chunk in file_diff['chunks']:
1027 1027 context_iter = iter(context)
1028 1028 for line_idx, line in enumerate(chunk):
1029 1029 try:
1030 1030 if _context_line(line) == context_iter.next():
1031 1031 continue
1032 1032 except StopIteration:
1033 1033 matches.append((line_idx, chunk))
1034 1034 context_iter = iter(context)
1035 1035
1036 1036 # Increment position and triger StopIteration
1037 1037 # if we had a match at the end
1038 1038 line_idx += 1
1039 1039 try:
1040 1040 context_iter.next()
1041 1041 except StopIteration:
1042 1042 matches.append((line_idx, chunk))
1043 1043
1044 1044 effective_offset = len(context) - offset
1045 1045 found_at_diff_lines = [
1046 1046 _line_to_diff_line_number(chunk[idx - effective_offset])
1047 1047 for idx, chunk in matches]
1048 1048
1049 1049 return found_at_diff_lines
1050 1050
1051 1051 def _get_file_diff(self, path):
1052 1052 for file_diff in self.parsed_diff:
1053 1053 if file_diff['filename'] == path:
1054 1054 break
1055 1055 else:
1056 1056 raise FileNotInDiffException("File {} not in diff".format(path))
1057 1057 return file_diff
1058 1058
1059 1059 def _find_chunk_line_index(self, file_diff, diff_line):
1060 1060 for chunk in file_diff['chunks']:
1061 1061 for idx, line in enumerate(chunk):
1062 1062 if line['old_lineno'] == diff_line.old:
1063 1063 return chunk, idx
1064 1064 if line['new_lineno'] == diff_line.new:
1065 1065 return chunk, idx
1066 1066 raise LineNotInDiffException(
1067 1067 "The line {} is not part of the diff.".format(diff_line))
1068 1068
1069 1069
1070 1070 def _is_diff_content(line):
1071 1071 return line['action'] in (
1072 1072 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1073 1073
1074 1074
1075 1075 def _context_line(line):
1076 1076 return (line['action'], line['line'])
1077 1077
1078 1078
1079 1079 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1080 1080
1081 1081
1082 1082 def _line_to_diff_line_number(line):
1083 1083 new_line_no = line['new_lineno'] or None
1084 1084 old_line_no = line['old_lineno'] or None
1085 1085 return DiffLineNumber(old=old_line_no, new=new_line_no)
1086 1086
1087 1087
1088 1088 class FileNotInDiffException(Exception):
1089 1089 """
1090 1090 Raised when the context for a missing file is requested.
1091 1091
1092 1092 If you request the context for a line in a file which is not part of the
1093 1093 given diff, then this exception is raised.
1094 1094 """
1095 1095
1096 1096
1097 1097 class LineNotInDiffException(Exception):
1098 1098 """
1099 1099 Raised when the context for a missing line is requested.
1100 1100
1101 1101 If you request the context for a line in a file and this line is not
1102 1102 part of the given diff, then this exception is raised.
1103 1103 """
1104 1104
1105 1105
1106 1106 class DiffLimitExceeded(Exception):
1107 1107 pass
@@ -1,805 +1,813 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2010-2017 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import textwrap
22 22
23 23 import pytest
24 24
25 25 from rhodecode.lib.diffs import (
26 26 DiffProcessor,
27 27 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
28 28 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
29 29 from rhodecode.tests.fixture import Fixture
30 30 from rhodecode.lib.vcs.backends.git.repository import GitDiff
31 31 from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
32 32 from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
33 33
34 34 fixture = Fixture()
35 35
36 36
37 37 def test_diffprocessor_as_html_with_comments():
38 38 raw_diff = textwrap.dedent('''
39 39 diff --git a/setup.py b/setup.py
40 40 index 5b36422..cfd698e 100755
41 41 --- a/setup.py
42 42 +++ b/setup.py
43 43 @@ -2,7 +2,7 @@
44 44 #!/usr/bin/python
45 45 # Setup file for X
46 46 # Copyright (C) No one
47 47 -
48 48 +x
49 49 try:
50 50 from setuptools import setup, Extension
51 51 except ImportError:
52 52 ''')
53 53 diff = GitDiff(raw_diff)
54 54 processor = DiffProcessor(diff)
55 55 processor.prepare()
56 56
57 57 # Note that the cell with the context in line 5 (in the html) has the
58 58 # no-comment class, which will prevent the add comment icon to be displayed.
59 59 expected_html = textwrap.dedent('''
60 60 <table class="code-difftable">
61 61 <tr class="line context">
62 62 <td class="add-comment-line"><span class="add-comment-content"></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
63 63 <td class="lineno old">...</td>
64 64 <td class="lineno new">...</td>
65 65 <td class="code no-comment">
66 66 <pre>@@ -2,7 +2,7 @@
67 67 </pre>
68 68 </td>
69 69 </tr>
70 70 <tr class="line unmod">
71 71 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
72 72 <td id="setuppy_o2" class="lineno old"><a href="#setuppy_o2" class="tooltip"
73 73 title="Click to select line">2</a></td>
74 74 <td id="setuppy_n2" class="lineno new"><a href="#setuppy_n2" class="tooltip"
75 75 title="Click to select line">2</a></td>
76 76 <td class="code">
77 77 <pre>#!/usr/bin/python
78 78 </pre>
79 79 </td>
80 80 </tr>
81 81 <tr class="line unmod">
82 82 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
83 83 <td id="setuppy_o3" class="lineno old"><a href="#setuppy_o3" class="tooltip"
84 84 title="Click to select line">3</a></td>
85 85 <td id="setuppy_n3" class="lineno new"><a href="#setuppy_n3" class="tooltip"
86 86 title="Click to select line">3</a></td>
87 87 <td class="code">
88 88 <pre># Setup file for X
89 89 </pre>
90 90 </td>
91 91 </tr>
92 92 <tr class="line unmod">
93 93 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
94 94 <td id="setuppy_o4" class="lineno old"><a href="#setuppy_o4" class="tooltip"
95 95 title="Click to select line">4</a></td>
96 96 <td id="setuppy_n4" class="lineno new"><a href="#setuppy_n4" class="tooltip"
97 97 title="Click to select line">4</a></td>
98 98 <td class="code">
99 99 <pre># Copyright (C) No one
100 100 </pre>
101 101 </td>
102 102 </tr>
103 103 <tr class="line del">
104 104 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
105 105 <td id="setuppy_o5" class="lineno old"><a href="#setuppy_o5" class="tooltip"
106 106 title="Click to select line">5</a></td>
107 107 <td class="lineno new"><a href="#setuppy_n" class="tooltip"
108 108 title="Click to select line"></a></td>
109 109 <td class="code">
110 110 <pre>
111 111 </pre>
112 112 </td>
113 113 </tr>
114 114 <tr class="line add">
115 115 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
116 116 <td class="lineno old"><a href="#setuppy_o" class="tooltip"
117 117 title="Click to select line"></a></td>
118 118 <td id="setuppy_n5" class="lineno new"><a href="#setuppy_n5" class="tooltip"
119 119 title="Click to select line">5</a></td>
120 120 <td class="code">
121 121 <pre><ins>x</ins>
122 122 </pre>
123 123 </td>
124 124 </tr>
125 125 <tr class="line unmod">
126 126 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
127 127 <td id="setuppy_o6" class="lineno old"><a href="#setuppy_o6" class="tooltip"
128 128 title="Click to select line">6</a></td>
129 129 <td id="setuppy_n6" class="lineno new"><a href="#setuppy_n6" class="tooltip"
130 130 title="Click to select line">6</a></td>
131 131 <td class="code">
132 132 <pre>try:
133 133 </pre>
134 134 </td>
135 135 </tr>
136 136 <tr class="line unmod">
137 137 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
138 138 <td id="setuppy_o7" class="lineno old"><a href="#setuppy_o7" class="tooltip"
139 139 title="Click to select line">7</a></td>
140 140 <td id="setuppy_n7" class="lineno new"><a href="#setuppy_n7" class="tooltip"
141 141 title="Click to select line">7</a></td>
142 142 <td class="code">
143 143 <pre> from setuptools import setup, Extension
144 144 </pre>
145 145 </td>
146 146 </tr>
147 147 <tr class="line unmod">
148 148 <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
149 149 <td id="setuppy_o8" class="lineno old"><a href="#setuppy_o8" class="tooltip"
150 150 title="Click to select line">8</a></td>
151 151 <td id="setuppy_n8" class="lineno new"><a href="#setuppy_n8" class="tooltip"
152 152 title="Click to select line">8</a></td>
153 153 <td class="code">
154 154 <pre>except ImportError:
155 155 </pre>
156 156 </td>
157 157 </tr>
158 158 </table>
159 159 ''').strip()
160 160 html = processor.as_html(enable_comments=True).replace('\t', ' ')
161 161
162 162 assert html == expected_html
163 163
164 164
165 165 class TestMixedFilenameEncodings:
166 166
167 167 @pytest.fixture(scope="class")
168 168 def raw_diff(self):
169 169 return fixture.load_resource(
170 170 'hg_diff_mixed_filename_encodings.diff')
171 171
172 172 @pytest.fixture
173 173 def processor(self, raw_diff):
174 174 diff = MercurialDiff(raw_diff)
175 175 processor = DiffProcessor(diff)
176 176 return processor
177 177
178 178 def test_filenames_are_decoded_to_unicode(self, processor):
179 179 diff_data = processor.prepare()
180 180 filenames = [item['filename'] for item in diff_data]
181 181 assert filenames == [
182 182 u'spΓ€cial-utf8.txt', u'spοΏ½cial-cp1252.txt', u'spοΏ½cial-latin1.txt']
183 183
184 184 def test_raw_diff_is_decoded_to_unicode(self, processor):
185 185 diff_data = processor.prepare()
186 186 raw_diffs = [item['raw_diff'] for item in diff_data]
187 187 new_file_message = u'\nnew file mode 100644\n'
188 188 expected_raw_diffs = [
189 189 u' a/spΓ€cial-utf8.txt b/spΓ€cial-utf8.txt' + new_file_message,
190 190 u' a/spοΏ½cial-cp1252.txt b/spοΏ½cial-cp1252.txt' + new_file_message,
191 191 u' a/spοΏ½cial-latin1.txt b/spοΏ½cial-latin1.txt' + new_file_message]
192 192 assert raw_diffs == expected_raw_diffs
193 193
194 194 def test_as_raw_preserves_the_encoding(self, processor, raw_diff):
195 195 assert processor.as_raw() == raw_diff
196 196
197 197
198 198 # TODO: mikhail: format the following data structure properly
199 199 DIFF_FIXTURES = [
200 200 ('hg',
201 201 'hg_diff_add_single_binary_file.diff',
202 202 [('US Warszawa.jpg', 'A',
203 203 {'added': 0,
204 204 'deleted': 0,
205 205 'binary': True,
206 206 'ops': {NEW_FILENODE: 'new file 100755',
207 207 BIN_FILENODE: 'binary diff hidden'}}),
208 208 ]),
209 209 ('hg',
210 210 'hg_diff_mod_single_binary_file.diff',
211 211 [('US Warszawa.jpg', 'M',
212 212 {'added': 0,
213 213 'deleted': 0,
214 214 'binary': True,
215 215 'ops': {MOD_FILENODE: 'modified file',
216 216 BIN_FILENODE: 'binary diff hidden'}}),
217 217 ]),
218 218 ('hg',
219 219 'hg_diff_mod_single_file_and_rename_and_chmod.diff',
220 220 [('README', 'M',
221 221 {'added': 3,
222 222 'deleted': 0,
223 223 'binary': False,
224 224 'ops': {MOD_FILENODE: 'modified file',
225 225 RENAMED_FILENODE: 'file renamed from README.rst to README',
226 226 CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
227 227 ]),
228 228 ('hg',
229 'hg_diff_no_newline.diff',
230 [('server.properties', 'M',
231 {'added': 2,
232 'deleted': 1,
233 'binary': False,
234 'ops': {MOD_FILENODE: 'modified file'}}),
235 ]),
236 ('hg',
229 237 'hg_diff_mod_file_and_rename.diff',
230 238 [('README.rst', 'M',
231 239 {'added': 3,
232 240 'deleted': 0,
233 241 'binary': False,
234 242 'ops': {MOD_FILENODE: 'modified file',
235 243 RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
236 244 ]),
237 245 ('hg',
238 246 'hg_diff_del_single_binary_file.diff',
239 247 [('US Warszawa.jpg', 'D',
240 248 {'added': 0,
241 249 'deleted': 0,
242 250 'binary': True,
243 251 'ops': {DEL_FILENODE: 'deleted file',
244 252 BIN_FILENODE: 'binary diff hidden'}}),
245 253 ]),
246 254 ('hg',
247 255 'hg_diff_chmod_and_mod_single_binary_file.diff',
248 256 [('gravatar.png', 'M',
249 257 {'added': 0,
250 258 'deleted': 0,
251 259 'binary': True,
252 260 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
253 261 BIN_FILENODE: 'binary diff hidden'}}),
254 262 ]),
255 263 ('hg',
256 264 'hg_diff_chmod.diff',
257 265 [('file', 'M',
258 266 {'added': 0,
259 267 'deleted': 0,
260 268 'binary': True,
261 269 'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
262 270 ]),
263 271 ('hg',
264 272 'hg_diff_rename_file.diff',
265 273 [('file_renamed', 'M',
266 274 {'added': 0,
267 275 'deleted': 0,
268 276 'binary': True,
269 277 'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
270 278 ]),
271 279 ('hg',
272 280 'hg_diff_rename_and_chmod_file.diff',
273 281 [('README', 'M',
274 282 {'added': 0,
275 283 'deleted': 0,
276 284 'binary': True,
277 285 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
278 286 RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
279 287 ]),
280 288 ('hg',
281 289 'hg_diff_binary_and_normal.diff',
282 290 [('img/baseline-10px.png', 'A',
283 291 {'added': 0,
284 292 'deleted': 0,
285 293 'binary': True,
286 294 'ops': {NEW_FILENODE: 'new file 100644',
287 295 BIN_FILENODE: 'binary diff hidden'}}),
288 296 ('js/jquery/hashgrid.js', 'A',
289 297 {'added': 340,
290 298 'deleted': 0,
291 299 'binary': False,
292 300 'ops': {NEW_FILENODE: 'new file 100755'}}),
293 301 ('index.html', 'M',
294 302 {'added': 3,
295 303 'deleted': 2,
296 304 'binary': False,
297 305 'ops': {MOD_FILENODE: 'modified file'}}),
298 306 ('less/docs.less', 'M',
299 307 {'added': 34,
300 308 'deleted': 0,
301 309 'binary': False,
302 310 'ops': {MOD_FILENODE: 'modified file'}}),
303 311 ('less/scaffolding.less', 'M',
304 312 {'added': 1,
305 313 'deleted': 3,
306 314 'binary': False,
307 315 'ops': {MOD_FILENODE: 'modified file'}}),
308 316 ('readme.markdown', 'M',
309 317 {'added': 1,
310 318 'deleted': 10,
311 319 'binary': False,
312 320 'ops': {MOD_FILENODE: 'modified file'}}),
313 321 ('img/baseline-20px.png', 'D',
314 322 {'added': 0,
315 323 'deleted': 0,
316 324 'binary': True,
317 325 'ops': {DEL_FILENODE: 'deleted file',
318 326 BIN_FILENODE: 'binary diff hidden'}}),
319 327 ('js/global.js', 'D',
320 328 {'added': 0,
321 329 'deleted': 75,
322 330 'binary': False,
323 331 'ops': {DEL_FILENODE: 'deleted file'}})
324 332 ]),
325 333 ('git',
326 334 'git_diff_chmod.diff',
327 335 [('work-horus.xls', 'M',
328 336 {'added': 0,
329 337 'deleted': 0,
330 338 'binary': True,
331 339 'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
332 340 ]),
333 341 ('git',
334 342 'git_diff_rename_file.diff',
335 343 [('file.xls', 'M',
336 344 {'added': 0,
337 345 'deleted': 0,
338 346 'binary': True,
339 347 'ops': {
340 348 RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}})
341 349 ]),
342 350 ('git',
343 351 'git_diff_mod_single_binary_file.diff',
344 352 [('US Warszawa.jpg', 'M',
345 353 {'added': 0,
346 354 'deleted': 0,
347 355 'binary': True,
348 356 'ops': {MOD_FILENODE: 'modified file',
349 357 BIN_FILENODE: 'binary diff hidden'}})
350 358 ]),
351 359 ('git',
352 360 'git_diff_binary_and_normal.diff',
353 361 [('img/baseline-10px.png', 'A',
354 362 {'added': 0,
355 363 'deleted': 0,
356 364 'binary': True,
357 365 'ops': {NEW_FILENODE: 'new file 100644',
358 366 BIN_FILENODE: 'binary diff hidden'}}),
359 367 ('js/jquery/hashgrid.js', 'A',
360 368 {'added': 340,
361 369 'deleted': 0,
362 370 'binary': False,
363 371 'ops': {NEW_FILENODE: 'new file 100755'}}),
364 372 ('index.html', 'M',
365 373 {'added': 3,
366 374 'deleted': 2,
367 375 'binary': False,
368 376 'ops': {MOD_FILENODE: 'modified file'}}),
369 377 ('less/docs.less', 'M',
370 378 {'added': 34,
371 379 'deleted': 0,
372 380 'binary': False,
373 381 'ops': {MOD_FILENODE: 'modified file'}}),
374 382 ('less/scaffolding.less', 'M',
375 383 {'added': 1,
376 384 'deleted': 3,
377 385 'binary': False,
378 386 'ops': {MOD_FILENODE: 'modified file'}}),
379 387 ('readme.markdown', 'M',
380 388 {'added': 1,
381 389 'deleted': 10,
382 390 'binary': False,
383 391 'ops': {MOD_FILENODE: 'modified file'}}),
384 392 ('img/baseline-20px.png', 'D',
385 393 {'added': 0,
386 394 'deleted': 0,
387 395 'binary': True,
388 396 'ops': {DEL_FILENODE: 'deleted file',
389 397 BIN_FILENODE: 'binary diff hidden'}}),
390 398 ('js/global.js', 'D',
391 399 {'added': 0,
392 400 'deleted': 75,
393 401 'binary': False,
394 402 'ops': {DEL_FILENODE: 'deleted file'}}),
395 403 ]),
396 404 ('hg',
397 405 'diff_with_diff_data.diff',
398 406 [('vcs/backends/base.py', 'M',
399 407 {'added': 18,
400 408 'deleted': 2,
401 409 'binary': False,
402 410 'ops': {MOD_FILENODE: 'modified file'}}),
403 411 ('vcs/backends/git/repository.py', 'M',
404 412 {'added': 46,
405 413 'deleted': 15,
406 414 'binary': False,
407 415 'ops': {MOD_FILENODE: 'modified file'}}),
408 416 ('vcs/backends/hg.py', 'M',
409 417 {'added': 22,
410 418 'deleted': 3,
411 419 'binary': False,
412 420 'ops': {MOD_FILENODE: 'modified file'}}),
413 421 ('vcs/tests/test_git.py', 'M',
414 422 {'added': 5,
415 423 'deleted': 5,
416 424 'binary': False,
417 425 'ops': {MOD_FILENODE: 'modified file'}}),
418 426 ('vcs/tests/test_repository.py', 'M',
419 427 {'added': 174,
420 428 'deleted': 2,
421 429 'binary': False,
422 430 'ops': {MOD_FILENODE: 'modified file'}}),
423 431 ]),
424 432 ('hg',
425 433 'hg_diff_copy_file.diff',
426 434 [('file2', 'M',
427 435 {'added': 0,
428 436 'deleted': 0,
429 437 'binary': True,
430 438 'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
431 439 ]),
432 440 ('hg',
433 441 'hg_diff_copy_and_modify_file.diff',
434 442 [('file3', 'M',
435 443 {'added': 1,
436 444 'deleted': 0,
437 445 'binary': False,
438 446 'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
439 447 MOD_FILENODE: 'modified file'}}),
440 448 ]),
441 449 ('hg',
442 450 'hg_diff_copy_and_chmod_file.diff',
443 451 [('file4', 'M',
444 452 {'added': 0,
445 453 'deleted': 0,
446 454 'binary': True,
447 455 'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
448 456 CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
449 457 ]),
450 458 ('hg',
451 459 'hg_diff_copy_chmod_and_edit_file.diff',
452 460 [('file5', 'M',
453 461 {'added': 2,
454 462 'deleted': 1,
455 463 'binary': False,
456 464 'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
457 465 CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
458 466 MOD_FILENODE: 'modified file'}})]),
459 467
460 468 # Diffs to validate rename and copy file with space in its name
461 469 ('git',
462 470 'git_diff_rename_file_with_spaces.diff',
463 471 [('file_with_ two spaces.txt', 'M',
464 472 {'added': 0,
465 473 'deleted': 0,
466 474 'binary': True,
467 475 'ops': {
468 476 RENAMED_FILENODE: (
469 477 'file renamed from file_with_ spaces.txt to file_with_ '
470 478 ' two spaces.txt')}
471 479 }), ]),
472 480 ('hg',
473 481 'hg_diff_rename_file_with_spaces.diff',
474 482 [('file_changed _.txt', 'M',
475 483 {'added': 0,
476 484 'deleted': 0,
477 485 'binary': True,
478 486 'ops': {
479 487 RENAMED_FILENODE: (
480 488 'file renamed from file_ with update.txt to file_changed'
481 489 ' _.txt')}
482 490 }), ]),
483 491 ('hg',
484 492 'hg_diff_copy_file_with_spaces.diff',
485 493 [('file_copied_ with spaces.txt', 'M',
486 494 {'added': 0,
487 495 'deleted': 0,
488 496 'binary': True,
489 497 'ops': {
490 498 COPIED_FILENODE: (
491 499 'file copied from file_changed_without_spaces.txt to'
492 500 ' file_copied_ with spaces.txt')}
493 501 }),
494 502 ]),
495 503
496 504 # special signs from git
497 505 ('git',
498 506 'git_diff_binary_special_files.diff',
499 507 [('css/_Icon\\r', 'A',
500 508 {'added': 0,
501 509 'deleted': 0,
502 510 'binary': True,
503 511 'ops': {NEW_FILENODE: 'new file 100644',
504 512 BIN_FILENODE: 'binary diff hidden'}
505 513 }),
506 514 ]),
507 515 ('git',
508 516 'git_diff_binary_special_files_2.diff',
509 517 [('css/Icon\\r', 'A',
510 518 {'added': 0,
511 519 'deleted': 0,
512 520 'binary': True,
513 521 'ops': {NEW_FILENODE: 'new file 100644', }
514 522 }),
515 523 ]),
516 524
517 525 ('svn',
518 526 'svn_diff_binary_add_file.diff',
519 527 [('intl.dll', 'A',
520 528 {'added': 0,
521 529 'deleted': 0,
522 530 'binary': False,
523 531 'ops': {NEW_FILENODE: 'new file 10644',
524 532 #TODO(Marcink): depends on binary detection on svn patches
525 533 # BIN_FILENODE: 'binary diff hidden'
526 534 }
527 535 }),
528 536 ]),
529 537
530 538 ('svn',
531 539 'svn_diff_multiple_changes.diff',
532 540 [('trunk/doc/images/SettingsOverlay.png', 'M',
533 541 {'added': 0,
534 542 'deleted': 0,
535 543 'binary': False,
536 544 'ops': {MOD_FILENODE: 'modified file',
537 545 #TODO(Marcink): depends on binary detection on svn patches
538 546 # BIN_FILENODE: 'binary diff hidden'
539 547 }
540 548 }),
541 549 ('trunk/doc/source/de/tsvn_ch04.xml', 'M',
542 550 {'added': 89,
543 551 'deleted': 34,
544 552 'binary': False,
545 553 'ops': {MOD_FILENODE: 'modified file'}
546 554 }),
547 555 ('trunk/doc/source/en/tsvn_ch04.xml', 'M',
548 556 {'added': 66,
549 557 'deleted': 21,
550 558 'binary': False,
551 559 'ops': {MOD_FILENODE: 'modified file'}
552 560 }),
553 561 ('trunk/src/Changelog.txt', 'M',
554 562 {'added': 2,
555 563 'deleted': 0,
556 564 'binary': False,
557 565 'ops': {MOD_FILENODE: 'modified file'}
558 566 }),
559 567 ('trunk/src/Resources/TortoiseProcENG.rc', 'M',
560 568 {'added': 19,
561 569 'deleted': 13,
562 570 'binary': False,
563 571 'ops': {MOD_FILENODE: 'modified file'}
564 572 }),
565 573 ('trunk/src/TortoiseProc/SetOverlayPage.cpp', 'M',
566 574 {'added': 16,
567 575 'deleted': 1,
568 576 'binary': False,
569 577 'ops': {MOD_FILENODE: 'modified file'}
570 578 }),
571 579 ('trunk/src/TortoiseProc/SetOverlayPage.h', 'M',
572 580 {'added': 3,
573 581 'deleted': 0,
574 582 'binary': False,
575 583 'ops': {MOD_FILENODE: 'modified file'}
576 584 }),
577 585 ('trunk/src/TortoiseProc/resource.h', 'M',
578 586 {'added': 2,
579 587 'deleted': 0,
580 588 'binary': False,
581 589 'ops': {MOD_FILENODE: 'modified file'}
582 590 }),
583 591 ('trunk/src/TortoiseShell/ShellCache.h', 'M',
584 592 {'added': 50,
585 593 'deleted': 1,
586 594 'binary': False,
587 595 'ops': {MOD_FILENODE: 'modified file'}
588 596 }),
589 597 ]),
590 598
591 599
592 600 # TODO: mikhail: do we still need this?
593 601 # (
594 602 # 'hg',
595 603 # 'large_diff.diff',
596 604 # [
597 605 # ('.hgignore', 'A', {
598 606 # 'deleted': 0, 'binary': False, 'added': 3, 'ops': {
599 607 # 1: 'new file 100644'}}),
600 608 # (
601 609 # 'MANIFEST.in', 'A',
602 610 # {'deleted': 0, 'binary': False, 'added': 3, 'ops': {
603 611 # 1: 'new file 100644'}}),
604 612 # (
605 613 # 'README.txt', 'A',
606 614 # {'deleted': 0, 'binary': False, 'added': 19, 'ops': {
607 615 # 1: 'new file 100644'}}),
608 616 # (
609 617 # 'development.ini', 'A', {
610 618 # 'deleted': 0, 'binary': False, 'added': 116, 'ops': {
611 619 # 1: 'new file 100644'}}),
612 620 # (
613 621 # 'docs/index.txt', 'A', {
614 622 # 'deleted': 0, 'binary': False, 'added': 19, 'ops': {
615 623 # 1: 'new file 100644'}}),
616 624 # (
617 625 # 'ez_setup.py', 'A', {
618 626 # 'deleted': 0, 'binary': False, 'added': 276, 'ops': {
619 627 # 1: 'new file 100644'}}),
620 628 # (
621 629 # 'hgapp.py', 'A', {
622 630 # 'deleted': 0, 'binary': False, 'added': 26, 'ops': {
623 631 # 1: 'new file 100644'}}),
624 632 # (
625 633 # 'hgwebdir.config', 'A', {
626 634 # 'deleted': 0, 'binary': False, 'added': 21, 'ops': {
627 635 # 1: 'new file 100644'}}),
628 636 # (
629 637 # 'pylons_app.egg-info/PKG-INFO', 'A', {
630 638 # 'deleted': 0, 'binary': False, 'added': 10, 'ops': {
631 639 # 1: 'new file 100644'}}),
632 640 # (
633 641 # 'pylons_app.egg-info/SOURCES.txt', 'A', {
634 642 # 'deleted': 0, 'binary': False, 'added': 33, 'ops': {
635 643 # 1: 'new file 100644'}}),
636 644 # (
637 645 # 'pylons_app.egg-info/dependency_links.txt', 'A', {
638 646 # 'deleted': 0, 'binary': False, 'added': 1, 'ops': {
639 647 # 1: 'new file 100644'}}),
640 648 # ]
641 649 # ),
642 650 ]
643 651
644 652 DIFF_FIXTURES_WITH_CONTENT = [
645 653 (
646 654 'hg', 'hg_diff_single_file_change_newline.diff',
647 655 [
648 656 (
649 657 'file_b', # filename
650 658 'A', # change
651 659 { # stats
652 660 'added': 1,
653 661 'deleted': 0,
654 662 'binary': False,
655 663 'ops': {NEW_FILENODE: 'new file 100644', }
656 664 },
657 665 '@@ -0,0 +1 @@\n+test_content b\n' # diff
658 666 ),
659 667 ],
660 668 ),
661 669 (
662 670 'hg', 'hg_diff_double_file_change_newline.diff',
663 671 [
664 672 (
665 673 'file_b', # filename
666 674 'A', # change
667 675 { # stats
668 676 'added': 1,
669 677 'deleted': 0,
670 678 'binary': False,
671 679 'ops': {NEW_FILENODE: 'new file 100644', }
672 680 },
673 681 '@@ -0,0 +1 @@\n+test_content b\n' # diff
674 682 ),
675 683 (
676 684 'file_c', # filename
677 685 'A', # change
678 686 { # stats
679 687 'added': 1,
680 688 'deleted': 0,
681 689 'binary': False,
682 690 'ops': {NEW_FILENODE: 'new file 100644', }
683 691 },
684 692 '@@ -0,0 +1 @@\n+test_content c\n' # diff
685 693 ),
686 694 ],
687 695 ),
688 696 (
689 697 'hg', 'hg_diff_double_file_change_double_newline.diff',
690 698 [
691 699 (
692 700 'file_b', # filename
693 701 'A', # change
694 702 { # stats
695 703 'added': 1,
696 704 'deleted': 0,
697 705 'binary': False,
698 706 'ops': {NEW_FILENODE: 'new file 100644', }
699 707 },
700 708 '@@ -0,0 +1 @@\n+test_content b\n\n' # diff
701 709 ),
702 710 (
703 711 'file_c', # filename
704 712 'A', # change
705 713 { # stats
706 714 'added': 1,
707 715 'deleted': 0,
708 716 'binary': False,
709 717 'ops': {NEW_FILENODE: 'new file 100644', }
710 718 },
711 719 '@@ -0,0 +1 @@\n+test_content c\n' # diff
712 720 ),
713 721 ],
714 722 ),
715 723 (
716 724 'hg', 'hg_diff_four_file_change_newline.diff',
717 725 [
718 726 (
719 727 'file', # filename
720 728 'A', # change
721 729 { # stats
722 730 'added': 1,
723 731 'deleted': 0,
724 732 'binary': False,
725 733 'ops': {NEW_FILENODE: 'new file 100644', }
726 734 },
727 735 '@@ -0,0 +1,1 @@\n+file\n' # diff
728 736 ),
729 737 (
730 738 'file2', # filename
731 739 'A', # change
732 740 { # stats
733 741 'added': 1,
734 742 'deleted': 0,
735 743 'binary': False,
736 744 'ops': {NEW_FILENODE: 'new file 100644', }
737 745 },
738 746 '@@ -0,0 +1,1 @@\n+another line\n' # diff
739 747 ),
740 748 (
741 749 'file3', # filename
742 750 'A', # change
743 751 { # stats
744 752 'added': 1,
745 753 'deleted': 0,
746 754 'binary': False,
747 755 'ops': {NEW_FILENODE: 'new file 100644', }
748 756 },
749 757 '@@ -0,0 +1,1 @@\n+newline\n' # diff
750 758 ),
751 759 (
752 760 'file4', # filename
753 761 'A', # change
754 762 { # stats
755 763 'added': 1,
756 764 'deleted': 0,
757 765 'binary': False,
758 766 'ops': {NEW_FILENODE: 'new file 100644', }
759 767 },
760 768 '@@ -0,0 +1,1 @@\n+fil4\n\\ No newline at end of file' # diff
761 769 ),
762 770 ],
763 771 ),
764 772
765 773 ]
766 774
767 775
768 776 diff_class = {
769 777 'git': GitDiff,
770 778 'hg': MercurialDiff,
771 779 'svn': SubversionDiff,
772 780 }
773 781
774 782
775 783 @pytest.fixture(params=DIFF_FIXTURES)
776 784 def diff_fixture(request):
777 785 vcs, diff_fixture, expected = request.param
778 786 diff_txt = fixture.load_resource(diff_fixture)
779 787 diff = diff_class[vcs](diff_txt)
780 788 return diff, expected
781 789
782 790
783 791 def test_diff_lib(diff_fixture):
784 792 diff, expected_data = diff_fixture
785 793 diff_proc = DiffProcessor(diff)
786 794 diff_proc_d = diff_proc.prepare()
787 795 data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
788 796 assert expected_data == data
789 797
790 798
791 799 @pytest.fixture(params=DIFF_FIXTURES_WITH_CONTENT)
792 800 def diff_fixture_w_content(request):
793 801 vcs, diff_fixture, expected = request.param
794 802 diff_txt = fixture.load_resource(diff_fixture)
795 803 diff = diff_class[vcs](diff_txt)
796 804 return diff, expected
797 805
798 806
799 807 def test_diff_lib_newlines(diff_fixture_w_content):
800 808 diff, expected_data = diff_fixture_w_content
801 809 diff_proc = DiffProcessor(diff)
802 810 diff_proc_d = diff_proc.prepare()
803 811 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
804 812 for x in diff_proc_d]
805 813 assert expected_data == data
General Comments 0
You need to be logged in to leave comments. Login now