##// END OF EJS Templates
ux: make 'no newline at end of file' message more pronounced in diffs
dan -
r1032:ab6082d0 default
parent child Browse files
Show More
@@ -1,635 +1,641 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2016 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import logging
22 22 import difflib
23 23 from itertools import groupby
24 24
25 25 from pygments import lex
26 26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
27 27 from rhodecode.lib.helpers import (
28 28 get_lexer_for_filenode, get_lexer_safe, html_escape)
29 29 from rhodecode.lib.utils2 import AttributeDict
30 30 from rhodecode.lib.vcs.nodes import FileNode
31 31 from rhodecode.lib.diff_match_patch import diff_match_patch
32 32 from rhodecode.lib.diffs import LimitedDiffContainer
33 33 from pygments.lexers import get_lexer_by_name
34 34
35 35 plain_text_lexer = get_lexer_by_name(
36 36 'text', stripall=False, stripnl=False, ensurenl=False)
37 37
38 38
39 39 log = logging.getLogger()
40 40
41 41
42 42 def filenode_as_lines_tokens(filenode, lexer=None):
43 43 lexer = lexer or get_lexer_for_filenode(filenode)
44 44 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
45 45 tokens = tokenize_string(filenode.content, lexer)
46 46 lines = split_token_stream(tokens, split_string='\n')
47 47 rv = list(lines)
48 48 return rv
49 49
50 50
51 51 def tokenize_string(content, lexer):
52 52 """
53 53 Use pygments to tokenize some content based on a lexer
54 54 ensuring all original new lines and whitespace is preserved
55 55 """
56 56
57 57 lexer.stripall = False
58 58 lexer.stripnl = False
59 59 lexer.ensurenl = False
60 60 for token_type, token_text in lex(content, lexer):
61 61 yield pygment_token_class(token_type), token_text
62 62
63 63
64 64 def split_token_stream(tokens, split_string=u'\n'):
65 65 """
66 66 Take a list of (TokenType, text) tuples and split them by a string
67 67
68 68 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
69 69 [(TEXT, 'some'), (TEXT, 'text'),
70 70 (TEXT, 'more'), (TEXT, 'text')]
71 71 """
72 72
73 73 buffer = []
74 74 for token_class, token_text in tokens:
75 75 parts = token_text.split(split_string)
76 76 for part in parts[:-1]:
77 77 buffer.append((token_class, part))
78 78 yield buffer
79 79 buffer = []
80 80
81 81 buffer.append((token_class, parts[-1]))
82 82
83 83 if buffer:
84 84 yield buffer
85 85
86 86
87 87 def filenode_as_annotated_lines_tokens(filenode):
88 88 """
89 89 Take a file node and return a list of annotations => lines, if no annotation
90 90 is found, it will be None.
91 91
92 92 eg:
93 93
94 94 [
95 95 (annotation1, [
96 96 (1, line1_tokens_list),
97 97 (2, line2_tokens_list),
98 98 ]),
99 99 (annotation2, [
100 100 (3, line1_tokens_list),
101 101 ]),
102 102 (None, [
103 103 (4, line1_tokens_list),
104 104 ]),
105 105 (annotation1, [
106 106 (5, line1_tokens_list),
107 107 (6, line2_tokens_list),
108 108 ])
109 109 ]
110 110 """
111 111
112 112 commit_cache = {} # cache commit_getter lookups
113 113
114 114 def _get_annotation(commit_id, commit_getter):
115 115 if commit_id not in commit_cache:
116 116 commit_cache[commit_id] = commit_getter()
117 117 return commit_cache[commit_id]
118 118
119 119 annotation_lookup = {
120 120 line_no: _get_annotation(commit_id, commit_getter)
121 121 for line_no, commit_id, commit_getter, line_content
122 122 in filenode.annotate
123 123 }
124 124
125 125 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
126 126 for line_no, tokens
127 127 in enumerate(filenode_as_lines_tokens(filenode), 1))
128 128
129 129 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
130 130
131 131 for annotation, group in grouped_annotations_lines:
132 132 yield (
133 133 annotation, [(line_no, tokens)
134 134 for (_, line_no, tokens) in group]
135 135 )
136 136
137 137
138 138 def render_tokenstream(tokenstream):
139 139 result = []
140 140 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
141 141
142 142 if token_class:
143 143 result.append(u'<span class="%s">' % token_class)
144 144 else:
145 145 result.append(u'<span>')
146 146
147 147 for op_tag, token_text in token_ops_texts:
148 148
149 149 if op_tag:
150 150 result.append(u'<%s>' % op_tag)
151 151
152 152 escaped_text = html_escape(token_text)
153 153
154 154 # TODO: dan: investigate showing hidden characters like space/nl/tab
155 155 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
156 156 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
157 157 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
158 158
159 159 result.append(escaped_text)
160 160
161 161 if op_tag:
162 162 result.append(u'</%s>' % op_tag)
163 163
164 164 result.append(u'</span>')
165 165
166 166 html = ''.join(result)
167 167 return html
168 168
169 169
170 170 def rollup_tokenstream(tokenstream):
171 171 """
172 172 Group a token stream of the format:
173 173
174 174 ('class', 'op', 'text')
175 175 or
176 176 ('class', 'text')
177 177
178 178 into
179 179
180 180 [('class1',
181 181 [('op1', 'text'),
182 182 ('op2', 'text')]),
183 183 ('class2',
184 184 [('op3', 'text')])]
185 185
186 186 This is used to get the minimal tags necessary when
187 187 rendering to html eg for a token stream ie.
188 188
189 189 <span class="A"><ins>he</ins>llo</span>
190 190 vs
191 191 <span class="A"><ins>he</ins></span><span class="A">llo</span>
192 192
193 193 If a 2 tuple is passed in, the output op will be an empty string.
194 194
195 195 eg:
196 196
197 197 >>> rollup_tokenstream([('classA', '', 'h'),
198 198 ('classA', 'del', 'ell'),
199 199 ('classA', '', 'o'),
200 200 ('classB', '', ' '),
201 201 ('classA', '', 'the'),
202 202 ('classA', '', 're'),
203 203 ])
204 204
205 205 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
206 206 ('classB', [('', ' ')],
207 207 ('classA', [('', 'there')]]
208 208
209 209 """
210 210 if tokenstream and len(tokenstream[0]) == 2:
211 211 tokenstream = ((t[0], '', t[1]) for t in tokenstream)
212 212
213 213 result = []
214 214 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
215 215 ops = []
216 216 for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
217 217 text_buffer = []
218 218 for t_class, t_op, t_text in token_text_list:
219 219 text_buffer.append(t_text)
220 220 ops.append((token_op, ''.join(text_buffer)))
221 221 result.append((token_class, ops))
222 222 return result
223 223
224 224
225 225 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
226 226 """
227 227 Converts a list of (token_class, token_text) tuples to a list of
228 228 (token_class, token_op, token_text) tuples where token_op is one of
229 229 ('ins', 'del', '')
230 230
231 231 :param old_tokens: list of (token_class, token_text) tuples of old line
232 232 :param new_tokens: list of (token_class, token_text) tuples of new line
233 233 :param use_diff_match_patch: boolean, will use google's diff match patch
234 234 library which has options to 'smooth' out the character by character
235 235 differences making nicer ins/del blocks
236 236 """
237 237
238 238 old_tokens_result = []
239 239 new_tokens_result = []
240 240
241 241 similarity = difflib.SequenceMatcher(None,
242 242 ''.join(token_text for token_class, token_text in old_tokens),
243 243 ''.join(token_text for token_class, token_text in new_tokens)
244 244 ).ratio()
245 245
246 246 if similarity < 0.6: # return, the blocks are too different
247 247 for token_class, token_text in old_tokens:
248 248 old_tokens_result.append((token_class, '', token_text))
249 249 for token_class, token_text in new_tokens:
250 250 new_tokens_result.append((token_class, '', token_text))
251 251 return old_tokens_result, new_tokens_result, similarity
252 252
253 253 token_sequence_matcher = difflib.SequenceMatcher(None,
254 254 [x[1] for x in old_tokens],
255 255 [x[1] for x in new_tokens])
256 256
257 257 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
258 258 # check the differences by token block types first to give a more
259 259 # nicer "block" level replacement vs character diffs
260 260
261 261 if tag == 'equal':
262 262 for token_class, token_text in old_tokens[o1:o2]:
263 263 old_tokens_result.append((token_class, '', token_text))
264 264 for token_class, token_text in new_tokens[n1:n2]:
265 265 new_tokens_result.append((token_class, '', token_text))
266 266 elif tag == 'delete':
267 267 for token_class, token_text in old_tokens[o1:o2]:
268 268 old_tokens_result.append((token_class, 'del', token_text))
269 269 elif tag == 'insert':
270 270 for token_class, token_text in new_tokens[n1:n2]:
271 271 new_tokens_result.append((token_class, 'ins', token_text))
272 272 elif tag == 'replace':
273 273 # if same type token blocks must be replaced, do a diff on the
274 274 # characters in the token blocks to show individual changes
275 275
276 276 old_char_tokens = []
277 277 new_char_tokens = []
278 278 for token_class, token_text in old_tokens[o1:o2]:
279 279 for char in token_text:
280 280 old_char_tokens.append((token_class, char))
281 281
282 282 for token_class, token_text in new_tokens[n1:n2]:
283 283 for char in token_text:
284 284 new_char_tokens.append((token_class, char))
285 285
286 286 old_string = ''.join([token_text for
287 287 token_class, token_text in old_char_tokens])
288 288 new_string = ''.join([token_text for
289 289 token_class, token_text in new_char_tokens])
290 290
291 291 char_sequence = difflib.SequenceMatcher(
292 292 None, old_string, new_string)
293 293 copcodes = char_sequence.get_opcodes()
294 294 obuffer, nbuffer = [], []
295 295
296 296 if use_diff_match_patch:
297 297 dmp = diff_match_patch()
298 298 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
299 299 reps = dmp.diff_main(old_string, new_string)
300 300 dmp.diff_cleanupEfficiency(reps)
301 301
302 302 a, b = 0, 0
303 303 for op, rep in reps:
304 304 l = len(rep)
305 305 if op == 0:
306 306 for i, c in enumerate(rep):
307 307 obuffer.append((old_char_tokens[a+i][0], '', c))
308 308 nbuffer.append((new_char_tokens[b+i][0], '', c))
309 309 a += l
310 310 b += l
311 311 elif op == -1:
312 312 for i, c in enumerate(rep):
313 313 obuffer.append((old_char_tokens[a+i][0], 'del', c))
314 314 a += l
315 315 elif op == 1:
316 316 for i, c in enumerate(rep):
317 317 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
318 318 b += l
319 319 else:
320 320 for ctag, co1, co2, cn1, cn2 in copcodes:
321 321 if ctag == 'equal':
322 322 for token_class, token_text in old_char_tokens[co1:co2]:
323 323 obuffer.append((token_class, '', token_text))
324 324 for token_class, token_text in new_char_tokens[cn1:cn2]:
325 325 nbuffer.append((token_class, '', token_text))
326 326 elif ctag == 'delete':
327 327 for token_class, token_text in old_char_tokens[co1:co2]:
328 328 obuffer.append((token_class, 'del', token_text))
329 329 elif ctag == 'insert':
330 330 for token_class, token_text in new_char_tokens[cn1:cn2]:
331 331 nbuffer.append((token_class, 'ins', token_text))
332 332 elif ctag == 'replace':
333 333 for token_class, token_text in old_char_tokens[co1:co2]:
334 334 obuffer.append((token_class, 'del', token_text))
335 335 for token_class, token_text in new_char_tokens[cn1:cn2]:
336 336 nbuffer.append((token_class, 'ins', token_text))
337 337
338 338 old_tokens_result.extend(obuffer)
339 339 new_tokens_result.extend(nbuffer)
340 340
341 341 return old_tokens_result, new_tokens_result, similarity
342 342
343 343
344 344 class DiffSet(object):
345 345 """
346 346 An object for parsing the diff result from diffs.DiffProcessor and
347 347 adding highlighting, side by side/unified renderings and line diffs
348 348 """
349 349
350 350 HL_REAL = 'REAL' # highlights using original file, slow
351 351 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
352 352 # in the case of multiline code
353 353 HL_NONE = 'NONE' # no highlighting, fastest
354 354
355 355 def __init__(self, highlight_mode=HL_REAL,
356 356 source_node_getter=lambda filename: None,
357 357 target_node_getter=lambda filename: None,
358 358 source_nodes=None, target_nodes=None,
359 359 max_file_size_limit=150 * 1024, # files over this size will
360 360 # use fast highlighting
361 361 ):
362 362
363 363 self.highlight_mode = highlight_mode
364 364 self.highlighted_filenodes = {}
365 365 self.source_node_getter = source_node_getter
366 366 self.target_node_getter = target_node_getter
367 367 self.source_nodes = source_nodes or {}
368 368 self.target_nodes = target_nodes or {}
369 369
370 370
371 371 self.max_file_size_limit = max_file_size_limit
372 372
373 373 def render_patchset(self, patchset, source_ref=None, target_ref=None):
374 374 diffset = AttributeDict(dict(
375 375 lines_added=0,
376 376 lines_deleted=0,
377 377 changed_files=0,
378 378 files=[],
379 379 limited_diff=isinstance(patchset, LimitedDiffContainer),
380 380 source_ref=source_ref,
381 381 target_ref=target_ref,
382 382 ))
383 383 for patch in patchset:
384 384 filediff = self.render_patch(patch)
385 385 filediff.diffset = diffset
386 386 diffset.files.append(filediff)
387 387 diffset.changed_files += 1
388 388 if not patch['stats']['binary']:
389 389 diffset.lines_added += patch['stats']['added']
390 390 diffset.lines_deleted += patch['stats']['deleted']
391 391
392 392 return diffset
393 393
394 394 _lexer_cache = {}
395 395 def _get_lexer_for_filename(self, filename):
396 396 # cached because we might need to call it twice for source/target
397 397 if filename not in self._lexer_cache:
398 398 self._lexer_cache[filename] = get_lexer_safe(filepath=filename)
399 399 return self._lexer_cache[filename]
400 400
401 401 def render_patch(self, patch):
402 402 log.debug('rendering diff for %r' % patch['filename'])
403 403
404 404 source_filename = patch['original_filename']
405 405 target_filename = patch['filename']
406 406
407 407 source_lexer = plain_text_lexer
408 408 target_lexer = plain_text_lexer
409 409
410 410 if not patch['stats']['binary']:
411 411 if self.highlight_mode == self.HL_REAL:
412 412 if (source_filename and patch['operation'] in ('D', 'M')
413 413 and source_filename not in self.source_nodes):
414 414 self.source_nodes[source_filename] = (
415 415 self.source_node_getter(source_filename))
416 416
417 417 if (target_filename and patch['operation'] in ('A', 'M')
418 418 and target_filename not in self.target_nodes):
419 419 self.target_nodes[target_filename] = (
420 420 self.target_node_getter(target_filename))
421 421
422 422 elif self.highlight_mode == self.HL_FAST:
423 423 source_lexer = self._get_lexer_for_filename(source_filename)
424 424 target_lexer = self._get_lexer_for_filename(target_filename)
425 425
426 426 source_file = self.source_nodes.get(source_filename, source_filename)
427 427 target_file = self.target_nodes.get(target_filename, target_filename)
428 428
429 429 source_filenode, target_filenode = None, None
430 430
431 431 # TODO: dan: FileNode.lexer works on the content of the file - which
432 432 # can be slow - issue #4289 explains a lexer clean up - which once
433 433 # done can allow caching a lexer for a filenode to avoid the file lookup
434 434 if isinstance(source_file, FileNode):
435 435 source_filenode = source_file
436 436 source_lexer = source_file.lexer
437 437 if isinstance(target_file, FileNode):
438 438 target_filenode = target_file
439 439 target_lexer = target_file.lexer
440 440
441 441 source_file_path, target_file_path = None, None
442 442
443 443 if source_filename != '/dev/null':
444 444 source_file_path = source_filename
445 445 if target_filename != '/dev/null':
446 446 target_file_path = target_filename
447 447
448 448 source_file_type = source_lexer.name
449 449 target_file_type = target_lexer.name
450 450
451 451 op_hunks = patch['chunks'][0]
452 452 hunks = patch['chunks'][1:]
453 453
454 454 filediff = AttributeDict({
455 455 'source_file_path': source_file_path,
456 456 'target_file_path': target_file_path,
457 457 'source_filenode': source_filenode,
458 458 'target_filenode': target_filenode,
459 459 'hunks': [],
460 460 'source_file_type': target_file_type,
461 461 'target_file_type': source_file_type,
462 462 'patch': patch,
463 463 'source_mode': patch['stats']['old_mode'],
464 464 'target_mode': patch['stats']['new_mode'],
465 465 'limited_diff': isinstance(patch, LimitedDiffContainer),
466 466 'diffset': self,
467 467 })
468 468
469 469 for hunk in hunks:
470 470 hunkbit = self.parse_hunk(hunk, source_file, target_file)
471 471 hunkbit.filediff = filediff
472 472 filediff.hunks.append(hunkbit)
473 473 return filediff
474 474
475 475 def parse_hunk(self, hunk, source_file, target_file):
476 476 result = AttributeDict(dict(
477 477 source_start=hunk['source_start'],
478 478 source_length=hunk['source_length'],
479 479 target_start=hunk['target_start'],
480 480 target_length=hunk['target_length'],
481 481 section_header=hunk['section_header'],
482 482 lines=[],
483 483 ))
484 484 before, after = [], []
485 485
486 486 for line in hunk['lines']:
487 487 if line['action'] == 'unmod':
488 488 result.lines.extend(
489 489 self.parse_lines(before, after, source_file, target_file))
490 490 after.append(line)
491 491 before.append(line)
492 492 elif line['action'] == 'add':
493 493 after.append(line)
494 494 elif line['action'] == 'del':
495 495 before.append(line)
496 elif line['action'] == 'context-old':
496 elif line['action'] == 'old-no-nl':
497 497 before.append(line)
498 elif line['action'] == 'context-new':
498 elif line['action'] == 'new-no-nl':
499 499 after.append(line)
500 500
501 501 result.lines.extend(
502 502 self.parse_lines(before, after, source_file, target_file))
503 503 result.unified = self.as_unified(result.lines)
504 504 result.sideside = result.lines
505 505 return result
506 506
507 507 def parse_lines(self, before_lines, after_lines, source_file, target_file):
508 508 # TODO: dan: investigate doing the diff comparison and fast highlighting
509 509 # on the entire before and after buffered block lines rather than by
510 510 # line, this means we can get better 'fast' highlighting if the context
511 511 # allows it - eg.
512 512 # line 4: """
513 513 # line 5: this gets highlighted as a string
514 514 # line 6: """
515 515
516 516 lines = []
517 517 while before_lines or after_lines:
518 518 before, after = None, None
519 519 before_tokens, after_tokens = None, None
520 520
521 521 if before_lines:
522 522 before = before_lines.pop(0)
523 523 if after_lines:
524 524 after = after_lines.pop(0)
525 525
526 526 original = AttributeDict()
527 527 modified = AttributeDict()
528 528
529 529 if before:
530 if before['action'] == 'old-no-nl':
531 before_tokens = [('nonl', before['line'])]
532 else:
530 533 before_tokens = self.get_line_tokens(
531 534 line_text=before['line'], line_number=before['old_lineno'],
532 535 file=source_file)
533 536 original.lineno = before['old_lineno']
534 537 original.content = before['line']
535 538 original.action = self.action_to_op(before['action'])
536 539
537 540 if after:
541 if after['action'] == 'new-no-nl':
542 after_tokens = [('nonl', after['line'])]
543 else:
538 544 after_tokens = self.get_line_tokens(
539 545 line_text=after['line'], line_number=after['new_lineno'],
540 546 file=target_file)
541 547 modified.lineno = after['new_lineno']
542 548 modified.content = after['line']
543 549 modified.action = self.action_to_op(after['action'])
544 550
545
546 551 # diff the lines
547 552 if before_tokens and after_tokens:
548 o_tokens, m_tokens, similarity = tokens_diff(before_tokens, after_tokens)
553 o_tokens, m_tokens, similarity = tokens_diff(
554 before_tokens, after_tokens)
549 555 original.content = render_tokenstream(o_tokens)
550 556 modified.content = render_tokenstream(m_tokens)
551 557 elif before_tokens:
552 558 original.content = render_tokenstream(
553 559 [(x[0], '', x[1]) for x in before_tokens])
554 560 elif after_tokens:
555 561 modified.content = render_tokenstream(
556 562 [(x[0], '', x[1]) for x in after_tokens])
557 563
558 564 lines.append(AttributeDict({
559 565 'original': original,
560 566 'modified': modified,
561 567 }))
562 568
563 569 return lines
564 570
565 571 def get_line_tokens(self, line_text, line_number, file=None):
566 572 filenode = None
567 573 filename = None
568 574
569 575 if isinstance(file, basestring):
570 576 filename = file
571 577 elif isinstance(file, FileNode):
572 578 filenode = file
573 579 filename = file.unicode_path
574 580
575 581 if self.highlight_mode == self.HL_REAL and filenode:
576 582 if line_number and file.size < self.max_file_size_limit:
577 583 return self.get_tokenized_filenode_line(file, line_number)
578 584
579 585 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
580 586 lexer = self._get_lexer_for_filename(filename)
581 587 return list(tokenize_string(line_text, lexer))
582 588
583 589 return list(tokenize_string(line_text, plain_text_lexer))
584 590
585 591 def get_tokenized_filenode_line(self, filenode, line_number):
586 592
587 593 if filenode not in self.highlighted_filenodes:
588 594 tokenized_lines = filenode_as_lines_tokens(filenode, filenode.lexer)
589 595 self.highlighted_filenodes[filenode] = tokenized_lines
590 596 return self.highlighted_filenodes[filenode][line_number - 1]
591 597
592 598 def action_to_op(self, action):
593 599 return {
594 600 'add': '+',
595 601 'del': '-',
596 602 'unmod': ' ',
597 'context-old': ' ',
598 'context-new': ' ',
603 'old-no-nl': ' ',
604 'new-no-nl': ' ',
599 605 }.get(action, action)
600 606
601 607 def as_unified(self, lines):
602 608 """ Return a generator that yields the lines of a diff in unified order """
603 609 def generator():
604 610 buf = []
605 611 for line in lines:
606 612
607 613 if buf and not line.original or line.original.action == ' ':
608 614 for b in buf:
609 615 yield b
610 616 buf = []
611 617
612 618 if line.original:
613 619 if line.original.action == ' ':
614 620 yield (line.original.lineno, line.modified.lineno,
615 621 line.original.action, line.original.content)
616 622 continue
617 623
618 624 if line.original.action == '-':
619 625 yield (line.original.lineno, None,
620 626 line.original.action, line.original.content)
621 627
622 628 if line.modified.action == '+':
623 629 buf.append((
624 630 None, line.modified.lineno,
625 631 line.modified.action, line.modified.content))
626 632 continue
627 633
628 634 if line.modified:
629 635 yield (None, line.modified.lineno,
630 636 line.modified.action, line.modified.content)
631 637
632 638 for b in buf:
633 639 yield b
634 640
635 641 return generator()
@@ -1,1161 +1,1161 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2016 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Set of diffing helpers, previously part of vcs
24 24 """
25 25
26 26 import collections
27 27 import re
28 28 import difflib
29 29 import logging
30 30
31 31 from itertools import tee, imap
32 32
33 33 from pylons.i18n.translation import _
34 34
35 35 from rhodecode.lib.vcs.exceptions import VCSError
36 36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 37 from rhodecode.lib.vcs.backends.base import EmptyCommit
38 38 from rhodecode.lib.helpers import escape
39 39 from rhodecode.lib.utils2 import safe_unicode
40 40
41 41 log = logging.getLogger(__name__)
42 42
43 43 # define max context, a file with more than this numbers of lines is unusable
44 44 # in browser anyway
45 45 MAX_CONTEXT = 1024 * 1014
46 46
47 47
48 48 class OPS(object):
49 49 ADD = 'A'
50 50 MOD = 'M'
51 51 DEL = 'D'
52 52
53 53
54 54 def wrap_to_table(str_):
55 55 return '''<table class="code-difftable">
56 56 <tr class="line no-comment">
57 57 <td class="add-comment-line tooltip" title="%s"><span class="add-comment-content"></span></td>
58 58 <td></td>
59 59 <td class="lineno new"></td>
60 60 <td class="code no-comment"><pre>%s</pre></td>
61 61 </tr>
62 62 </table>''' % (_('Click to comment'), str_)
63 63
64 64
65 65 def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,
66 66 show_full_diff=False, ignore_whitespace=True, line_context=3,
67 67 enable_comments=False):
68 68 """
69 69 returns a wrapped diff into a table, checks for cut_off_limit for file and
70 70 whole diff and presents proper message
71 71 """
72 72
73 73 if filenode_old is None:
74 74 filenode_old = FileNode(filenode_new.path, '', EmptyCommit())
75 75
76 76 if filenode_old.is_binary or filenode_new.is_binary:
77 77 diff = wrap_to_table(_('Binary file'))
78 78 stats = None
79 79 size = 0
80 80 data = None
81 81
82 82 elif diff_limit != -1 and (diff_limit is None or
83 83 (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
84 84
85 85 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
86 86 ignore_whitespace=ignore_whitespace,
87 87 context=line_context)
88 88 diff_processor = DiffProcessor(
89 89 f_gitdiff, format='gitdiff', diff_limit=diff_limit,
90 90 file_limit=file_limit, show_full_diff=show_full_diff)
91 91 _parsed = diff_processor.prepare()
92 92
93 93 diff = diff_processor.as_html(enable_comments=enable_comments)
94 94 stats = _parsed[0]['stats'] if _parsed else None
95 95 size = len(diff or '')
96 96 data = _parsed[0] if _parsed else None
97 97 else:
98 98 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
99 99 'diff menu to display this diff'))
100 100 stats = None
101 101 size = 0
102 102 data = None
103 103 if not diff:
104 104 submodules = filter(lambda o: isinstance(o, SubModuleNode),
105 105 [filenode_new, filenode_old])
106 106 if submodules:
107 107 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
108 108 else:
109 109 diff = wrap_to_table(_('No changes detected'))
110 110
111 111 cs1 = filenode_old.commit.raw_id
112 112 cs2 = filenode_new.commit.raw_id
113 113
114 114 return size, cs1, cs2, diff, stats, data
115 115
116 116
117 117 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
118 118 """
119 119 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
120 120
121 121 :param ignore_whitespace: ignore whitespaces in diff
122 122 """
123 123 # make sure we pass in default context
124 124 context = context or 3
125 125 # protect against IntOverflow when passing HUGE context
126 126 if context > MAX_CONTEXT:
127 127 context = MAX_CONTEXT
128 128
129 129 submodules = filter(lambda o: isinstance(o, SubModuleNode),
130 130 [filenode_new, filenode_old])
131 131 if submodules:
132 132 return ''
133 133
134 134 for filenode in (filenode_old, filenode_new):
135 135 if not isinstance(filenode, FileNode):
136 136 raise VCSError(
137 137 "Given object should be FileNode object, not %s"
138 138 % filenode.__class__)
139 139
140 140 repo = filenode_new.commit.repository
141 141 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
142 142 new_commit = filenode_new.commit
143 143
144 144 vcs_gitdiff = repo.get_diff(
145 145 old_commit, new_commit, filenode_new.path,
146 146 ignore_whitespace, context, path1=filenode_old.path)
147 147 return vcs_gitdiff
148 148
149 149 NEW_FILENODE = 1
150 150 DEL_FILENODE = 2
151 151 MOD_FILENODE = 3
152 152 RENAMED_FILENODE = 4
153 153 COPIED_FILENODE = 5
154 154 CHMOD_FILENODE = 6
155 155 BIN_FILENODE = 7
156 156
157 157
158 158 class LimitedDiffContainer(object):
159 159
160 160 def __init__(self, diff_limit, cur_diff_size, diff):
161 161 self.diff = diff
162 162 self.diff_limit = diff_limit
163 163 self.cur_diff_size = cur_diff_size
164 164
165 165 def __getitem__(self, key):
166 166 return self.diff.__getitem__(key)
167 167
168 168 def __iter__(self):
169 169 for l in self.diff:
170 170 yield l
171 171
172 172
173 173 class Action(object):
174 174 """
175 175 Contains constants for the action value of the lines in a parsed diff.
176 176 """
177 177
178 178 ADD = 'add'
179 179 DELETE = 'del'
180 180 UNMODIFIED = 'unmod'
181 181
182 182 CONTEXT = 'context'
183 CONTEXT_OLD = 'context-old'
184 CONTEXT_NEW = 'context-new'
183 OLD_NO_NL = 'old-no-nl'
184 NEW_NO_NL = 'new-no-nl'
185 185
186 186
187 187 class DiffProcessor(object):
188 188 """
189 189 Give it a unified or git diff and it returns a list of the files that were
190 190 mentioned in the diff together with a dict of meta information that
191 191 can be used to render it in a HTML template.
192 192
193 193 .. note:: Unicode handling
194 194
195 195 The original diffs are a byte sequence and can contain filenames
196 196 in mixed encodings. This class generally returns `unicode` objects
197 197 since the result is intended for presentation to the user.
198 198
199 199 """
200 200 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
201 201 _newline_marker = re.compile(r'^\\ No newline at end of file')
202 202
203 203 # used for inline highlighter word split
204 204 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
205 205
206 206 def __init__(self, diff, format='gitdiff', diff_limit=None,
207 207 file_limit=None, show_full_diff=True):
208 208 """
209 209 :param diff: A `Diff` object representing a diff from a vcs backend
210 210 :param format: format of diff passed, `udiff` or `gitdiff`
211 211 :param diff_limit: define the size of diff that is considered "big"
212 212 based on that parameter cut off will be triggered, set to None
213 213 to show full diff
214 214 """
215 215 self._diff = diff
216 216 self._format = format
217 217 self.adds = 0
218 218 self.removes = 0
219 219 # calculate diff size
220 220 self.diff_limit = diff_limit
221 221 self.file_limit = file_limit
222 222 self.show_full_diff = show_full_diff
223 223 self.cur_diff_size = 0
224 224 self.parsed = False
225 225 self.parsed_diff = []
226 226
227 227 if format == 'gitdiff':
228 228 self.differ = self._highlight_line_difflib
229 229 self._parser = self._parse_gitdiff
230 230 else:
231 231 self.differ = self._highlight_line_udiff
232 232 self._parser = self._new_parse_gitdiff
233 233
234 234 def _copy_iterator(self):
235 235 """
236 236 make a fresh copy of generator, we should not iterate thru
237 237 an original as it's needed for repeating operations on
238 238 this instance of DiffProcessor
239 239 """
240 240 self.__udiff, iterator_copy = tee(self.__udiff)
241 241 return iterator_copy
242 242
243 243 def _escaper(self, string):
244 244 """
245 245 Escaper for diff escapes special chars and checks the diff limit
246 246
247 247 :param string:
248 248 """
249 249
250 250 self.cur_diff_size += len(string)
251 251
252 252 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
253 253 raise DiffLimitExceeded('Diff Limit Exceeded')
254 254
255 255 return safe_unicode(string)\
256 256 .replace('&', '&amp;')\
257 257 .replace('<', '&lt;')\
258 258 .replace('>', '&gt;')
259 259
260 260 def _line_counter(self, l):
261 261 """
262 262 Checks each line and bumps total adds/removes for this diff
263 263
264 264 :param l:
265 265 """
266 266 if l.startswith('+') and not l.startswith('+++'):
267 267 self.adds += 1
268 268 elif l.startswith('-') and not l.startswith('---'):
269 269 self.removes += 1
270 270 return safe_unicode(l)
271 271
272 272 def _highlight_line_difflib(self, line, next_):
273 273 """
274 274 Highlight inline changes in both lines.
275 275 """
276 276
277 277 if line['action'] == Action.DELETE:
278 278 old, new = line, next_
279 279 else:
280 280 old, new = next_, line
281 281
282 282 oldwords = self._token_re.split(old['line'])
283 283 newwords = self._token_re.split(new['line'])
284 284 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
285 285
286 286 oldfragments, newfragments = [], []
287 287 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
288 288 oldfrag = ''.join(oldwords[i1:i2])
289 289 newfrag = ''.join(newwords[j1:j2])
290 290 if tag != 'equal':
291 291 if oldfrag:
292 292 oldfrag = '<del>%s</del>' % oldfrag
293 293 if newfrag:
294 294 newfrag = '<ins>%s</ins>' % newfrag
295 295 oldfragments.append(oldfrag)
296 296 newfragments.append(newfrag)
297 297
298 298 old['line'] = "".join(oldfragments)
299 299 new['line'] = "".join(newfragments)
300 300
301 301 def _highlight_line_udiff(self, line, next_):
302 302 """
303 303 Highlight inline changes in both lines.
304 304 """
305 305 start = 0
306 306 limit = min(len(line['line']), len(next_['line']))
307 307 while start < limit and line['line'][start] == next_['line'][start]:
308 308 start += 1
309 309 end = -1
310 310 limit -= start
311 311 while -end <= limit and line['line'][end] == next_['line'][end]:
312 312 end -= 1
313 313 end += 1
314 314 if start or end:
315 315 def do(l):
316 316 last = end + len(l['line'])
317 317 if l['action'] == Action.ADD:
318 318 tag = 'ins'
319 319 else:
320 320 tag = 'del'
321 321 l['line'] = '%s<%s>%s</%s>%s' % (
322 322 l['line'][:start],
323 323 tag,
324 324 l['line'][start:last],
325 325 tag,
326 326 l['line'][last:]
327 327 )
328 328 do(line)
329 329 do(next_)
330 330
331 331 def _clean_line(self, line, command):
332 332 if command in ['+', '-', ' ']:
333 333 # only modify the line if it's actually a diff thing
334 334 line = line[1:]
335 335 return line
336 336
337 337 def _parse_gitdiff(self, inline_diff=True):
338 338 _files = []
339 339 diff_container = lambda arg: arg
340 340
341 341 for chunk in self._diff.chunks():
342 342 head = chunk.header
343 343
344 344 diff = imap(self._escaper, chunk.diff.splitlines(1))
345 345 raw_diff = chunk.raw
346 346 limited_diff = False
347 347 exceeds_limit = False
348 348
349 349 op = None
350 350 stats = {
351 351 'added': 0,
352 352 'deleted': 0,
353 353 'binary': False,
354 354 'ops': {},
355 355 }
356 356
357 357 if head['deleted_file_mode']:
358 358 op = OPS.DEL
359 359 stats['binary'] = True
360 360 stats['ops'][DEL_FILENODE] = 'deleted file'
361 361
362 362 elif head['new_file_mode']:
363 363 op = OPS.ADD
364 364 stats['binary'] = True
365 365 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
366 366 else: # modify operation, can be copy, rename or chmod
367 367
368 368 # CHMOD
369 369 if head['new_mode'] and head['old_mode']:
370 370 op = OPS.MOD
371 371 stats['binary'] = True
372 372 stats['ops'][CHMOD_FILENODE] = (
373 373 'modified file chmod %s => %s' % (
374 374 head['old_mode'], head['new_mode']))
375 375 # RENAME
376 376 if head['rename_from'] != head['rename_to']:
377 377 op = OPS.MOD
378 378 stats['binary'] = True
379 379 stats['ops'][RENAMED_FILENODE] = (
380 380 'file renamed from %s to %s' % (
381 381 head['rename_from'], head['rename_to']))
382 382 # COPY
383 383 if head.get('copy_from') and head.get('copy_to'):
384 384 op = OPS.MOD
385 385 stats['binary'] = True
386 386 stats['ops'][COPIED_FILENODE] = (
387 387 'file copied from %s to %s' % (
388 388 head['copy_from'], head['copy_to']))
389 389
390 390 # If our new parsed headers didn't match anything fallback to
391 391 # old style detection
392 392 if op is None:
393 393 if not head['a_file'] and head['b_file']:
394 394 op = OPS.ADD
395 395 stats['binary'] = True
396 396 stats['ops'][NEW_FILENODE] = 'new file'
397 397
398 398 elif head['a_file'] and not head['b_file']:
399 399 op = OPS.DEL
400 400 stats['binary'] = True
401 401 stats['ops'][DEL_FILENODE] = 'deleted file'
402 402
403 403 # it's not ADD not DELETE
404 404 if op is None:
405 405 op = OPS.MOD
406 406 stats['binary'] = True
407 407 stats['ops'][MOD_FILENODE] = 'modified file'
408 408
409 409 # a real non-binary diff
410 410 if head['a_file'] or head['b_file']:
411 411 try:
412 412 raw_diff, chunks, _stats = self._parse_lines(diff)
413 413 stats['binary'] = False
414 414 stats['added'] = _stats[0]
415 415 stats['deleted'] = _stats[1]
416 416 # explicit mark that it's a modified file
417 417 if op == OPS.MOD:
418 418 stats['ops'][MOD_FILENODE] = 'modified file'
419 419 exceeds_limit = len(raw_diff) > self.file_limit
420 420
421 421 # changed from _escaper function so we validate size of
422 422 # each file instead of the whole diff
423 423 # diff will hide big files but still show small ones
424 424 # from my tests, big files are fairly safe to be parsed
425 425 # but the browser is the bottleneck
426 426 if not self.show_full_diff and exceeds_limit:
427 427 raise DiffLimitExceeded('File Limit Exceeded')
428 428
429 429 except DiffLimitExceeded:
430 430 diff_container = lambda _diff: \
431 431 LimitedDiffContainer(
432 432 self.diff_limit, self.cur_diff_size, _diff)
433 433
434 434 exceeds_limit = len(raw_diff) > self.file_limit
435 435 limited_diff = True
436 436 chunks = []
437 437
438 438 else: # GIT format binary patch, or possibly empty diff
439 439 if head['bin_patch']:
440 440 # we have operation already extracted, but we mark simply
441 441 # it's a diff we wont show for binary files
442 442 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
443 443 chunks = []
444 444
445 445 if chunks and not self.show_full_diff and op == OPS.DEL:
446 446 # if not full diff mode show deleted file contents
447 447 # TODO: anderson: if the view is not too big, there is no way
448 448 # to see the content of the file
449 449 chunks = []
450 450
451 451 chunks.insert(0, [{
452 452 'old_lineno': '',
453 453 'new_lineno': '',
454 454 'action': Action.CONTEXT,
455 455 'line': msg,
456 456 } for _op, msg in stats['ops'].iteritems()
457 457 if _op not in [MOD_FILENODE]])
458 458
459 459 _files.append({
460 460 'filename': safe_unicode(head['b_path']),
461 461 'old_revision': head['a_blob_id'],
462 462 'new_revision': head['b_blob_id'],
463 463 'chunks': chunks,
464 464 'raw_diff': safe_unicode(raw_diff),
465 465 'operation': op,
466 466 'stats': stats,
467 467 'exceeds_limit': exceeds_limit,
468 468 'is_limited_diff': limited_diff,
469 469 })
470 470
471 471 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
472 472 OPS.DEL: 2}.get(info['operation'])
473 473
474 474 if not inline_diff:
475 475 return diff_container(sorted(_files, key=sorter))
476 476
477 477 # highlight inline changes
478 478 for diff_data in _files:
479 479 for chunk in diff_data['chunks']:
480 480 lineiter = iter(chunk)
481 481 try:
482 482 while 1:
483 483 line = lineiter.next()
484 484 if line['action'] not in (
485 485 Action.UNMODIFIED, Action.CONTEXT):
486 486 nextline = lineiter.next()
487 487 if nextline['action'] in ['unmod', 'context'] or \
488 488 nextline['action'] == line['action']:
489 489 continue
490 490 self.differ(line, nextline)
491 491 except StopIteration:
492 492 pass
493 493
494 494 return diff_container(sorted(_files, key=sorter))
495 495
496 496
497 497 # FIXME: NEWDIFFS: dan: this replaces the old _escaper function
498 498 def _process_line(self, string):
499 499 """
500 500 Process a diff line, checks the diff limit
501 501
502 502 :param string:
503 503 """
504 504
505 505 self.cur_diff_size += len(string)
506 506
507 507 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
508 508 raise DiffLimitExceeded('Diff Limit Exceeded')
509 509
510 510 return safe_unicode(string)
511 511
512 512 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
513 513 def _new_parse_gitdiff(self, inline_diff=True):
514 514 _files = []
515 515 diff_container = lambda arg: arg
516 516 for chunk in self._diff.chunks():
517 517 head = chunk.header
518 518 log.debug('parsing diff %r' % head)
519 519
520 520 diff = imap(self._process_line, chunk.diff.splitlines(1))
521 521 raw_diff = chunk.raw
522 522 limited_diff = False
523 523 exceeds_limit = False
524 524 # if 'empty_file_to_modify_and_rename' in head['a_path']:
525 525 # 1/0
526 526 op = None
527 527 stats = {
528 528 'added': 0,
529 529 'deleted': 0,
530 530 'binary': False,
531 531 'old_mode': None,
532 532 'new_mode': None,
533 533 'ops': {},
534 534 }
535 535 if head['old_mode']:
536 536 stats['old_mode'] = head['old_mode']
537 537 if head['new_mode']:
538 538 stats['new_mode'] = head['new_mode']
539 539 if head['b_mode']:
540 540 stats['new_mode'] = head['b_mode']
541 541
542 542 if head['deleted_file_mode']:
543 543 op = OPS.DEL
544 544 stats['binary'] = True
545 545 stats['ops'][DEL_FILENODE] = 'deleted file'
546 546
547 547 elif head['new_file_mode']:
548 548 op = OPS.ADD
549 549 stats['binary'] = True
550 550 stats['old_mode'] = None
551 551 stats['new_mode'] = head['new_file_mode']
552 552 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
553 553 else: # modify operation, can be copy, rename or chmod
554 554
555 555 # CHMOD
556 556 if head['new_mode'] and head['old_mode']:
557 557 op = OPS.MOD
558 558 stats['binary'] = True
559 559 stats['ops'][CHMOD_FILENODE] = (
560 560 'modified file chmod %s => %s' % (
561 561 head['old_mode'], head['new_mode']))
562 562
563 563 # RENAME
564 564 if head['rename_from'] != head['rename_to']:
565 565 op = OPS.MOD
566 566 stats['binary'] = True
567 567 stats['renamed'] = (head['rename_from'], head['rename_to'])
568 568 stats['ops'][RENAMED_FILENODE] = (
569 569 'file renamed from %s to %s' % (
570 570 head['rename_from'], head['rename_to']))
571 571 # COPY
572 572 if head.get('copy_from') and head.get('copy_to'):
573 573 op = OPS.MOD
574 574 stats['binary'] = True
575 575 stats['copied'] = (head['copy_from'], head['copy_to'])
576 576 stats['ops'][COPIED_FILENODE] = (
577 577 'file copied from %s to %s' % (
578 578 head['copy_from'], head['copy_to']))
579 579
580 580 # If our new parsed headers didn't match anything fallback to
581 581 # old style detection
582 582 if op is None:
583 583 if not head['a_file'] and head['b_file']:
584 584 op = OPS.ADD
585 585 stats['binary'] = True
586 586 stats['new_file'] = True
587 587 stats['ops'][NEW_FILENODE] = 'new file'
588 588
589 589 elif head['a_file'] and not head['b_file']:
590 590 op = OPS.DEL
591 591 stats['binary'] = True
592 592 stats['ops'][DEL_FILENODE] = 'deleted file'
593 593
594 594 # it's not ADD not DELETE
595 595 if op is None:
596 596 op = OPS.MOD
597 597 stats['binary'] = True
598 598 stats['ops'][MOD_FILENODE] = 'modified file'
599 599
600 600 # a real non-binary diff
601 601 if head['a_file'] or head['b_file']:
602 602 try:
603 603 raw_diff, chunks, _stats = self._new_parse_lines(diff)
604 604 stats['binary'] = False
605 605 stats['added'] = _stats[0]
606 606 stats['deleted'] = _stats[1]
607 607 # explicit mark that it's a modified file
608 608 if op == OPS.MOD:
609 609 stats['ops'][MOD_FILENODE] = 'modified file'
610 610 exceeds_limit = len(raw_diff) > self.file_limit
611 611
612 612 # changed from _escaper function so we validate size of
613 613 # each file instead of the whole diff
614 614 # diff will hide big files but still show small ones
615 615 # from my tests, big files are fairly safe to be parsed
616 616 # but the browser is the bottleneck
617 617 if not self.show_full_diff and exceeds_limit:
618 618 raise DiffLimitExceeded('File Limit Exceeded')
619 619
620 620 except DiffLimitExceeded:
621 621 diff_container = lambda _diff: \
622 622 LimitedDiffContainer(
623 623 self.diff_limit, self.cur_diff_size, _diff)
624 624
625 625 exceeds_limit = len(raw_diff) > self.file_limit
626 626 limited_diff = True
627 627 chunks = []
628 628
629 629 else: # GIT format binary patch, or possibly empty diff
630 630 if head['bin_patch']:
631 631 # we have operation already extracted, but we mark simply
632 632 # it's a diff we wont show for binary files
633 633 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
634 634 chunks = []
635 635
636 636 if chunks and not self.show_full_diff and op == OPS.DEL:
637 637 # if not full diff mode show deleted file contents
638 638 # TODO: anderson: if the view is not too big, there is no way
639 639 # to see the content of the file
640 640 chunks = []
641 641
642 642 chunks.insert(0, [{
643 643 'old_lineno': '',
644 644 'new_lineno': '',
645 645 'action': Action.CONTEXT,
646 646 'line': msg,
647 647 } for _op, msg in stats['ops'].iteritems()
648 648 if _op not in [MOD_FILENODE]])
649 649
650 650 original_filename = safe_unicode(head['a_path'])
651 651 _files.append({
652 652 'original_filename': original_filename,
653 653 'filename': safe_unicode(head['b_path']),
654 654 'old_revision': head['a_blob_id'],
655 655 'new_revision': head['b_blob_id'],
656 656 'chunks': chunks,
657 657 'raw_diff': safe_unicode(raw_diff),
658 658 'operation': op,
659 659 'stats': stats,
660 660 'exceeds_limit': exceeds_limit,
661 661 'is_limited_diff': limited_diff,
662 662 })
663 663
664 664
665 665 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
666 666 OPS.DEL: 2}.get(info['operation'])
667 667
668 668 return diff_container(sorted(_files, key=sorter))
669 669
670 670 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
671 671 def _parse_lines(self, diff):
672 672 """
673 673 Parse the diff an return data for the template.
674 674 """
675 675
676 676 lineiter = iter(diff)
677 677 stats = [0, 0]
678 678 chunks = []
679 679 raw_diff = []
680 680
681 681 try:
682 682 line = lineiter.next()
683 683
684 684 while line:
685 685 raw_diff.append(line)
686 686 lines = []
687 687 chunks.append(lines)
688 688
689 689 match = self._chunk_re.match(line)
690 690
691 691 if not match:
692 692 break
693 693
694 694 gr = match.groups()
695 695 (old_line, old_end,
696 696 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
697 697 old_line -= 1
698 698 new_line -= 1
699 699
700 700 context = len(gr) == 5
701 701 old_end += old_line
702 702 new_end += new_line
703 703
704 704 if context:
705 705 # skip context only if it's first line
706 706 if int(gr[0]) > 1:
707 707 lines.append({
708 708 'old_lineno': '...',
709 709 'new_lineno': '...',
710 710 'action': Action.CONTEXT,
711 711 'line': line,
712 712 })
713 713
714 714 line = lineiter.next()
715 715
716 716 while old_line < old_end or new_line < new_end:
717 717 command = ' '
718 718 if line:
719 719 command = line[0]
720 720
721 721 affects_old = affects_new = False
722 722
723 723 # ignore those if we don't expect them
724 724 if command in '#@':
725 725 continue
726 726 elif command == '+':
727 727 affects_new = True
728 728 action = Action.ADD
729 729 stats[0] += 1
730 730 elif command == '-':
731 731 affects_old = True
732 732 action = Action.DELETE
733 733 stats[1] += 1
734 734 else:
735 735 affects_old = affects_new = True
736 736 action = Action.UNMODIFIED
737 737
738 738 if not self._newline_marker.match(line):
739 739 old_line += affects_old
740 740 new_line += affects_new
741 741 lines.append({
742 742 'old_lineno': affects_old and old_line or '',
743 743 'new_lineno': affects_new and new_line or '',
744 744 'action': action,
745 745 'line': self._clean_line(line, command)
746 746 })
747 747 raw_diff.append(line)
748 748
749 749 line = lineiter.next()
750 750
751 751 if self._newline_marker.match(line):
752 752 # we need to append to lines, since this is not
753 753 # counted in the line specs of diff
754 754 lines.append({
755 755 'old_lineno': '...',
756 756 'new_lineno': '...',
757 757 'action': Action.CONTEXT,
758 758 'line': self._clean_line(line, command)
759 759 })
760 760
761 761 except StopIteration:
762 762 pass
763 763 return ''.join(raw_diff), chunks, stats
764 764
765 765 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
766 766 def _new_parse_lines(self, diff):
767 767 """
768 768 Parse the diff an return data for the template.
769 769 """
770 770
771 771 lineiter = iter(diff)
772 772 stats = [0, 0]
773 773 chunks = []
774 774 raw_diff = []
775 775
776 776 try:
777 777 line = lineiter.next()
778 778
779 779 while line:
780 780 raw_diff.append(line)
781 781 match = self._chunk_re.match(line)
782 782
783 783 if not match:
784 784 break
785 785
786 786 gr = match.groups()
787 787 (old_line, old_end,
788 788 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
789 789
790 790 lines = []
791 791 hunk = {
792 792 'section_header': gr[-1],
793 793 'source_start': old_line,
794 794 'source_length': old_end,
795 795 'target_start': new_line,
796 796 'target_length': new_end,
797 797 'lines': lines,
798 798 }
799 799 chunks.append(hunk)
800 800
801 801 old_line -= 1
802 802 new_line -= 1
803 803
804 804 context = len(gr) == 5
805 805 old_end += old_line
806 806 new_end += new_line
807 807
808 808 line = lineiter.next()
809 809
810 810 while old_line < old_end or new_line < new_end:
811 811 command = ' '
812 812 if line:
813 813 command = line[0]
814 814
815 815 affects_old = affects_new = False
816 816
817 817 # ignore those if we don't expect them
818 818 if command in '#@':
819 819 continue
820 820 elif command == '+':
821 821 affects_new = True
822 822 action = Action.ADD
823 823 stats[0] += 1
824 824 elif command == '-':
825 825 affects_old = True
826 826 action = Action.DELETE
827 827 stats[1] += 1
828 828 else:
829 829 affects_old = affects_new = True
830 830 action = Action.UNMODIFIED
831 831
832 832 if not self._newline_marker.match(line):
833 833 old_line += affects_old
834 834 new_line += affects_new
835 835 lines.append({
836 836 'old_lineno': affects_old and old_line or '',
837 837 'new_lineno': affects_new and new_line or '',
838 838 'action': action,
839 839 'line': self._clean_line(line, command)
840 840 })
841 841 raw_diff.append(line)
842 842
843 843 line = lineiter.next()
844 844
845 845 if self._newline_marker.match(line):
846 846 # we need to append to lines, since this is not
847 847 # counted in the line specs of diff
848 848 if affects_old:
849 action = Action.CONTEXT_OLD
849 action = Action.OLD_NO_NL
850 850 elif affects_new:
851 action = Action.CONTEXT_NEW
851 action = Action.NEW_NO_NL
852 852 else:
853 853 raise Exception('invalid context for no newline')
854 854
855 855 lines.append({
856 856 'old_lineno': None,
857 857 'new_lineno': None,
858 858 'action': action,
859 859 'line': self._clean_line(line, command)
860 860 })
861 861
862 862 except StopIteration:
863 863 pass
864 864 return ''.join(raw_diff), chunks, stats
865 865
866 866 def _safe_id(self, idstring):
867 867 """Make a string safe for including in an id attribute.
868 868
869 869 The HTML spec says that id attributes 'must begin with
870 870 a letter ([A-Za-z]) and may be followed by any number
871 871 of letters, digits ([0-9]), hyphens ("-"), underscores
872 872 ("_"), colons (":"), and periods (".")'. These regexps
873 873 are slightly over-zealous, in that they remove colons
874 874 and periods unnecessarily.
875 875
876 876 Whitespace is transformed into underscores, and then
877 877 anything which is not a hyphen or a character that
878 878 matches \w (alphanumerics and underscore) is removed.
879 879
880 880 """
881 881 # Transform all whitespace to underscore
882 882 idstring = re.sub(r'\s', "_", '%s' % idstring)
883 883 # Remove everything that is not a hyphen or a member of \w
884 884 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
885 885 return idstring
886 886
887 887 def prepare(self, inline_diff=True):
888 888 """
889 889 Prepare the passed udiff for HTML rendering.
890 890
891 891 :return: A list of dicts with diff information.
892 892 """
893 893 parsed = self._parser(inline_diff=inline_diff)
894 894 self.parsed = True
895 895 self.parsed_diff = parsed
896 896 return parsed
897 897
898 898 def as_raw(self, diff_lines=None):
899 899 """
900 900 Returns raw diff as a byte string
901 901 """
902 902 return self._diff.raw
903 903
904 904 def as_html(self, table_class='code-difftable', line_class='line',
905 905 old_lineno_class='lineno old', new_lineno_class='lineno new',
906 906 code_class='code', enable_comments=False, parsed_lines=None):
907 907 """
908 908 Return given diff as html table with customized css classes
909 909 """
910 910 def _link_to_if(condition, label, url):
911 911 """
912 912 Generates a link if condition is meet or just the label if not.
913 913 """
914 914
915 915 if condition:
916 916 return '''<a href="%(url)s" class="tooltip"
917 917 title="%(title)s">%(label)s</a>''' % {
918 918 'title': _('Click to select line'),
919 919 'url': url,
920 920 'label': label
921 921 }
922 922 else:
923 923 return label
924 924 if not self.parsed:
925 925 self.prepare()
926 926
927 927 diff_lines = self.parsed_diff
928 928 if parsed_lines:
929 929 diff_lines = parsed_lines
930 930
931 931 _html_empty = True
932 932 _html = []
933 933 _html.append('''<table class="%(table_class)s">\n''' % {
934 934 'table_class': table_class
935 935 })
936 936
937 937 for diff in diff_lines:
938 938 for line in diff['chunks']:
939 939 _html_empty = False
940 940 for change in line:
941 941 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
942 942 'lc': line_class,
943 943 'action': change['action']
944 944 })
945 945 anchor_old_id = ''
946 946 anchor_new_id = ''
947 947 anchor_old = "%(filename)s_o%(oldline_no)s" % {
948 948 'filename': self._safe_id(diff['filename']),
949 949 'oldline_no': change['old_lineno']
950 950 }
951 951 anchor_new = "%(filename)s_n%(oldline_no)s" % {
952 952 'filename': self._safe_id(diff['filename']),
953 953 'oldline_no': change['new_lineno']
954 954 }
955 955 cond_old = (change['old_lineno'] != '...' and
956 956 change['old_lineno'])
957 957 cond_new = (change['new_lineno'] != '...' and
958 958 change['new_lineno'])
959 959 if cond_old:
960 960 anchor_old_id = 'id="%s"' % anchor_old
961 961 if cond_new:
962 962 anchor_new_id = 'id="%s"' % anchor_new
963 963
964 964 if change['action'] != Action.CONTEXT:
965 965 anchor_link = True
966 966 else:
967 967 anchor_link = False
968 968
969 969 ###########################################################
970 970 # COMMENT ICONS
971 971 ###########################################################
972 972 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
973 973
974 974 if enable_comments and change['action'] != Action.CONTEXT:
975 975 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
976 976
977 977 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
978 978
979 979 ###########################################################
980 980 # OLD LINE NUMBER
981 981 ###########################################################
982 982 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
983 983 'a_id': anchor_old_id,
984 984 'olc': old_lineno_class
985 985 })
986 986
987 987 _html.append('''%(link)s''' % {
988 988 'link': _link_to_if(anchor_link, change['old_lineno'],
989 989 '#%s' % anchor_old)
990 990 })
991 991 _html.append('''</td>\n''')
992 992 ###########################################################
993 993 # NEW LINE NUMBER
994 994 ###########################################################
995 995
996 996 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
997 997 'a_id': anchor_new_id,
998 998 'nlc': new_lineno_class
999 999 })
1000 1000
1001 1001 _html.append('''%(link)s''' % {
1002 1002 'link': _link_to_if(anchor_link, change['new_lineno'],
1003 1003 '#%s' % anchor_new)
1004 1004 })
1005 1005 _html.append('''</td>\n''')
1006 1006 ###########################################################
1007 1007 # CODE
1008 1008 ###########################################################
1009 1009 code_classes = [code_class]
1010 1010 if (not enable_comments or
1011 1011 change['action'] == Action.CONTEXT):
1012 1012 code_classes.append('no-comment')
1013 1013 _html.append('\t<td class="%s">' % ' '.join(code_classes))
1014 1014 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
1015 1015 'code': change['line']
1016 1016 })
1017 1017
1018 1018 _html.append('''\t</td>''')
1019 1019 _html.append('''\n</tr>\n''')
1020 1020 _html.append('''</table>''')
1021 1021 if _html_empty:
1022 1022 return None
1023 1023 return ''.join(_html)
1024 1024
1025 1025 def stat(self):
1026 1026 """
1027 1027 Returns tuple of added, and removed lines for this instance
1028 1028 """
1029 1029 return self.adds, self.removes
1030 1030
1031 1031 def get_context_of_line(
1032 1032 self, path, diff_line=None, context_before=3, context_after=3):
1033 1033 """
1034 1034 Returns the context lines for the specified diff line.
1035 1035
1036 1036 :type diff_line: :class:`DiffLineNumber`
1037 1037 """
1038 1038 assert self.parsed, "DiffProcessor is not initialized."
1039 1039
1040 1040 if None not in diff_line:
1041 1041 raise ValueError(
1042 1042 "Cannot specify both line numbers: {}".format(diff_line))
1043 1043
1044 1044 file_diff = self._get_file_diff(path)
1045 1045 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1046 1046
1047 1047 first_line_to_include = max(idx - context_before, 0)
1048 1048 first_line_after_context = idx + context_after + 1
1049 1049 context_lines = chunk[first_line_to_include:first_line_after_context]
1050 1050
1051 1051 line_contents = [
1052 1052 _context_line(line) for line in context_lines
1053 1053 if _is_diff_content(line)]
1054 1054 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1055 1055 # Once they are fixed, we can drop this line here.
1056 1056 if line_contents:
1057 1057 line_contents[-1] = (
1058 1058 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1059 1059 return line_contents
1060 1060
1061 1061 def find_context(self, path, context, offset=0):
1062 1062 """
1063 1063 Finds the given `context` inside of the diff.
1064 1064
1065 1065 Use the parameter `offset` to specify which offset the target line has
1066 1066 inside of the given `context`. This way the correct diff line will be
1067 1067 returned.
1068 1068
1069 1069 :param offset: Shall be used to specify the offset of the main line
1070 1070 within the given `context`.
1071 1071 """
1072 1072 if offset < 0 or offset >= len(context):
1073 1073 raise ValueError(
1074 1074 "Only positive values up to the length of the context "
1075 1075 "minus one are allowed.")
1076 1076
1077 1077 matches = []
1078 1078 file_diff = self._get_file_diff(path)
1079 1079
1080 1080 for chunk in file_diff['chunks']:
1081 1081 context_iter = iter(context)
1082 1082 for line_idx, line in enumerate(chunk):
1083 1083 try:
1084 1084 if _context_line(line) == context_iter.next():
1085 1085 continue
1086 1086 except StopIteration:
1087 1087 matches.append((line_idx, chunk))
1088 1088 context_iter = iter(context)
1089 1089
1090 1090 # Increment position and triger StopIteration
1091 1091 # if we had a match at the end
1092 1092 line_idx += 1
1093 1093 try:
1094 1094 context_iter.next()
1095 1095 except StopIteration:
1096 1096 matches.append((line_idx, chunk))
1097 1097
1098 1098 effective_offset = len(context) - offset
1099 1099 found_at_diff_lines = [
1100 1100 _line_to_diff_line_number(chunk[idx - effective_offset])
1101 1101 for idx, chunk in matches]
1102 1102
1103 1103 return found_at_diff_lines
1104 1104
1105 1105 def _get_file_diff(self, path):
1106 1106 for file_diff in self.parsed_diff:
1107 1107 if file_diff['filename'] == path:
1108 1108 break
1109 1109 else:
1110 1110 raise FileNotInDiffException("File {} not in diff".format(path))
1111 1111 return file_diff
1112 1112
1113 1113 def _find_chunk_line_index(self, file_diff, diff_line):
1114 1114 for chunk in file_diff['chunks']:
1115 1115 for idx, line in enumerate(chunk):
1116 1116 if line['old_lineno'] == diff_line.old:
1117 1117 return chunk, idx
1118 1118 if line['new_lineno'] == diff_line.new:
1119 1119 return chunk, idx
1120 1120 raise LineNotInDiffException(
1121 1121 "The line {} is not part of the diff.".format(diff_line))
1122 1122
1123 1123
1124 1124 def _is_diff_content(line):
1125 1125 return line['action'] in (
1126 1126 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1127 1127
1128 1128
1129 1129 def _context_line(line):
1130 1130 return (line['action'], line['line'])
1131 1131
1132 1132
1133 1133 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1134 1134
1135 1135
1136 1136 def _line_to_diff_line_number(line):
1137 1137 new_line_no = line['new_lineno'] or None
1138 1138 old_line_no = line['old_lineno'] or None
1139 1139 return DiffLineNumber(old=old_line_no, new=new_line_no)
1140 1140
1141 1141
1142 1142 class FileNotInDiffException(Exception):
1143 1143 """
1144 1144 Raised when the context for a missing file is requested.
1145 1145
1146 1146 If you request the context for a line in a file which is not part of the
1147 1147 given diff, then this exception is raised.
1148 1148 """
1149 1149
1150 1150
1151 1151 class LineNotInDiffException(Exception):
1152 1152 """
1153 1153 Raised when the context for a missing line is requested.
1154 1154
1155 1155 If you request the context for a line in a file and this line is not
1156 1156 part of the given diff, then this exception is raised.
1157 1157 """
1158 1158
1159 1159
1160 1160 class DiffLimitExceeded(Exception):
1161 1161 pass
General Comments 0
You need to be logged in to leave comments. Login now