##// END OF EJS Templates
diffs: limit the file context to ~1mln lines. Fixes #4184...
marcink -
r679:df6d63d7 stable
parent child Browse files
Show More
@@ -1,877 +1,885 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2016 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Set of diffing helpers, previously part of vcs
24 24 """
25 25
26 26 import collections
27 27 import re
28 28 import difflib
29 29 import logging
30 30
31 31 from itertools import tee, imap
32 32
33 33 from pylons.i18n.translation import _
34 34
35 35 from rhodecode.lib.vcs.exceptions import VCSError
36 36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 37 from rhodecode.lib.vcs.backends.base import EmptyCommit
38 38 from rhodecode.lib.helpers import escape
39 39 from rhodecode.lib.utils2 import safe_unicode
40 40
41 41 log = logging.getLogger(__name__)
42 42
43 # define max context, a file with more than this numbers of lines is unusable
44 # in browser anyway
45 MAX_CONTEXT = 1024 * 1014
46
43 47
44 48 class OPS(object):
45 49 ADD = 'A'
46 50 MOD = 'M'
47 51 DEL = 'D'
48 52
49 53
50 54 def wrap_to_table(str_):
51 55 return '''<table class="code-difftable">
52 56 <tr class="line no-comment">
53 57 <td class="add-comment-line tooltip" title="%s"><span class="add-comment-content"></span></td>
54 58 <td class="lineno new"></td>
55 59 <td class="code no-comment"><pre>%s</pre></td>
56 60 </tr>
57 61 </table>''' % (_('Click to comment'), str_)
58 62
59 63
60 64 def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,
61 65 show_full_diff=False, ignore_whitespace=True, line_context=3,
62 66 enable_comments=False):
63 67 """
64 68 returns a wrapped diff into a table, checks for cut_off_limit for file and
65 69 whole diff and presents proper message
66 70 """
67 71
68 72 if filenode_old is None:
69 73 filenode_old = FileNode(filenode_new.path, '', EmptyCommit())
70 74
71 75 if filenode_old.is_binary or filenode_new.is_binary:
72 76 diff = wrap_to_table(_('Binary file'))
73 77 stats = None
74 78 size = 0
75 79 data = None
76 80
77 81 elif diff_limit != -1 and (diff_limit is None or
78 82 (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
79 83
80 84 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
81 85 ignore_whitespace=ignore_whitespace,
82 86 context=line_context)
83 87 diff_processor = DiffProcessor(
84 88 f_gitdiff, format='gitdiff', diff_limit=diff_limit,
85 89 file_limit=file_limit, show_full_diff=show_full_diff)
86 90 _parsed = diff_processor.prepare()
87 91
88 92 diff = diff_processor.as_html(enable_comments=enable_comments)
89 93 stats = _parsed[0]['stats'] if _parsed else None
90 94 size = len(diff or '')
91 95 data = _parsed[0] if _parsed else None
92 96 else:
93 97 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
94 98 'diff menu to display this diff'))
95 99 stats = None
96 100 size = 0
97 101 data = None
98 102 if not diff:
99 103 submodules = filter(lambda o: isinstance(o, SubModuleNode),
100 104 [filenode_new, filenode_old])
101 105 if submodules:
102 106 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
103 107 else:
104 108 diff = wrap_to_table(_('No changes detected'))
105 109
106 110 cs1 = filenode_old.commit.raw_id
107 111 cs2 = filenode_new.commit.raw_id
108 112
109 113 return size, cs1, cs2, diff, stats, data
110 114
111 115
112 116 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
113 117 """
114 118 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
115 119
116 120 :param ignore_whitespace: ignore whitespaces in diff
117 121 """
118 122 # make sure we pass in default context
119 123 context = context or 3
124 # protect against IntOverflow when passing HUGE context
125 if context > MAX_CONTEXT:
126 context = MAX_CONTEXT
127
120 128 submodules = filter(lambda o: isinstance(o, SubModuleNode),
121 129 [filenode_new, filenode_old])
122 130 if submodules:
123 131 return ''
124 132
125 133 for filenode in (filenode_old, filenode_new):
126 134 if not isinstance(filenode, FileNode):
127 135 raise VCSError(
128 136 "Given object should be FileNode object, not %s"
129 137 % filenode.__class__)
130 138
131 139 repo = filenode_new.commit.repository
132 140 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
133 141 new_commit = filenode_new.commit
134 142
135 143 vcs_gitdiff = repo.get_diff(
136 144 old_commit, new_commit, filenode_new.path,
137 145 ignore_whitespace, context, path1=filenode_old.path)
138 146 return vcs_gitdiff
139 147
140 148 NEW_FILENODE = 1
141 149 DEL_FILENODE = 2
142 150 MOD_FILENODE = 3
143 151 RENAMED_FILENODE = 4
144 152 COPIED_FILENODE = 5
145 153 CHMOD_FILENODE = 6
146 154 BIN_FILENODE = 7
147 155
148 156
149 157 class LimitedDiffContainer(object):
150 158
151 159 def __init__(self, diff_limit, cur_diff_size, diff):
152 160 self.diff = diff
153 161 self.diff_limit = diff_limit
154 162 self.cur_diff_size = cur_diff_size
155 163
156 164 def __getitem__(self, key):
157 165 return self.diff.__getitem__(key)
158 166
159 167 def __iter__(self):
160 168 for l in self.diff:
161 169 yield l
162 170
163 171
164 172 class Action(object):
165 173 """
166 174 Contains constants for the action value of the lines in a parsed diff.
167 175 """
168 176
169 177 ADD = 'add'
170 178 DELETE = 'del'
171 179 UNMODIFIED = 'unmod'
172 180
173 181 CONTEXT = 'context'
174 182
175 183
176 184 class DiffProcessor(object):
177 185 """
178 186 Give it a unified or git diff and it returns a list of the files that were
179 187 mentioned in the diff together with a dict of meta information that
180 188 can be used to render it in a HTML template.
181 189
182 190 .. note:: Unicode handling
183 191
184 192 The original diffs are a byte sequence and can contain filenames
185 193 in mixed encodings. This class generally returns `unicode` objects
186 194 since the result is intended for presentation to the user.
187 195
188 196 """
189 197 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
190 198 _newline_marker = re.compile(r'^\\ No newline at end of file')
191 199
192 200 # used for inline highlighter word split
193 201 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
194 202
195 203 def __init__(self, diff, format='gitdiff', diff_limit=None,
196 204 file_limit=None, show_full_diff=True):
197 205 """
198 206 :param diff: A `Diff` object representing a diff from a vcs backend
199 207 :param format: format of diff passed, `udiff` or `gitdiff`
200 208 :param diff_limit: define the size of diff that is considered "big"
201 209 based on that parameter cut off will be triggered, set to None
202 210 to show full diff
203 211 """
204 212 self._diff = diff
205 213 self._format = format
206 214 self.adds = 0
207 215 self.removes = 0
208 216 # calculate diff size
209 217 self.diff_limit = diff_limit
210 218 self.file_limit = file_limit
211 219 self.show_full_diff = show_full_diff
212 220 self.cur_diff_size = 0
213 221 self.parsed = False
214 222 self.parsed_diff = []
215 223
216 224 if format == 'gitdiff':
217 225 self.differ = self._highlight_line_difflib
218 226 self._parser = self._parse_gitdiff
219 227 else:
220 228 self.differ = self._highlight_line_udiff
221 229 self._parser = self._parse_udiff
222 230
223 231 def _copy_iterator(self):
224 232 """
225 233 make a fresh copy of generator, we should not iterate thru
226 234 an original as it's needed for repeating operations on
227 235 this instance of DiffProcessor
228 236 """
229 237 self.__udiff, iterator_copy = tee(self.__udiff)
230 238 return iterator_copy
231 239
232 240 def _escaper(self, string):
233 241 """
234 242 Escaper for diff escapes special chars and checks the diff limit
235 243
236 244 :param string:
237 245 """
238 246
239 247 self.cur_diff_size += len(string)
240 248
241 249 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
242 250 raise DiffLimitExceeded('Diff Limit Exceeded')
243 251
244 252 return safe_unicode(string)\
245 253 .replace('&', '&amp;')\
246 254 .replace('<', '&lt;')\
247 255 .replace('>', '&gt;')
248 256
249 257 def _line_counter(self, l):
250 258 """
251 259 Checks each line and bumps total adds/removes for this diff
252 260
253 261 :param l:
254 262 """
255 263 if l.startswith('+') and not l.startswith('+++'):
256 264 self.adds += 1
257 265 elif l.startswith('-') and not l.startswith('---'):
258 266 self.removes += 1
259 267 return safe_unicode(l)
260 268
261 269 def _highlight_line_difflib(self, line, next_):
262 270 """
263 271 Highlight inline changes in both lines.
264 272 """
265 273
266 274 if line['action'] == Action.DELETE:
267 275 old, new = line, next_
268 276 else:
269 277 old, new = next_, line
270 278
271 279 oldwords = self._token_re.split(old['line'])
272 280 newwords = self._token_re.split(new['line'])
273 281 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
274 282
275 283 oldfragments, newfragments = [], []
276 284 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
277 285 oldfrag = ''.join(oldwords[i1:i2])
278 286 newfrag = ''.join(newwords[j1:j2])
279 287 if tag != 'equal':
280 288 if oldfrag:
281 289 oldfrag = '<del>%s</del>' % oldfrag
282 290 if newfrag:
283 291 newfrag = '<ins>%s</ins>' % newfrag
284 292 oldfragments.append(oldfrag)
285 293 newfragments.append(newfrag)
286 294
287 295 old['line'] = "".join(oldfragments)
288 296 new['line'] = "".join(newfragments)
289 297
290 298 def _highlight_line_udiff(self, line, next_):
291 299 """
292 300 Highlight inline changes in both lines.
293 301 """
294 302 start = 0
295 303 limit = min(len(line['line']), len(next_['line']))
296 304 while start < limit and line['line'][start] == next_['line'][start]:
297 305 start += 1
298 306 end = -1
299 307 limit -= start
300 308 while -end <= limit and line['line'][end] == next_['line'][end]:
301 309 end -= 1
302 310 end += 1
303 311 if start or end:
304 312 def do(l):
305 313 last = end + len(l['line'])
306 314 if l['action'] == Action.ADD:
307 315 tag = 'ins'
308 316 else:
309 317 tag = 'del'
310 318 l['line'] = '%s<%s>%s</%s>%s' % (
311 319 l['line'][:start],
312 320 tag,
313 321 l['line'][start:last],
314 322 tag,
315 323 l['line'][last:]
316 324 )
317 325 do(line)
318 326 do(next_)
319 327
320 328 def _clean_line(self, line, command):
321 329 if command in ['+', '-', ' ']:
322 330 # only modify the line if it's actually a diff thing
323 331 line = line[1:]
324 332 return line
325 333
326 334 def _parse_gitdiff(self, inline_diff=True):
327 335 _files = []
328 336 diff_container = lambda arg: arg
329 337
330 338 for chunk in self._diff.chunks():
331 339 head = chunk.header
332 340
333 341 diff = imap(self._escaper, chunk.diff.splitlines(1))
334 342 raw_diff = chunk.raw
335 343 limited_diff = False
336 344 exceeds_limit = False
337 345
338 346 op = None
339 347 stats = {
340 348 'added': 0,
341 349 'deleted': 0,
342 350 'binary': False,
343 351 'ops': {},
344 352 }
345 353
346 354 if head['deleted_file_mode']:
347 355 op = OPS.DEL
348 356 stats['binary'] = True
349 357 stats['ops'][DEL_FILENODE] = 'deleted file'
350 358
351 359 elif head['new_file_mode']:
352 360 op = OPS.ADD
353 361 stats['binary'] = True
354 362 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
355 363 else: # modify operation, can be copy, rename or chmod
356 364
357 365 # CHMOD
358 366 if head['new_mode'] and head['old_mode']:
359 367 op = OPS.MOD
360 368 stats['binary'] = True
361 369 stats['ops'][CHMOD_FILENODE] = (
362 370 'modified file chmod %s => %s' % (
363 371 head['old_mode'], head['new_mode']))
364 372 # RENAME
365 373 if head['rename_from'] != head['rename_to']:
366 374 op = OPS.MOD
367 375 stats['binary'] = True
368 376 stats['ops'][RENAMED_FILENODE] = (
369 377 'file renamed from %s to %s' % (
370 378 head['rename_from'], head['rename_to']))
371 379 # COPY
372 380 if head.get('copy_from') and head.get('copy_to'):
373 381 op = OPS.MOD
374 382 stats['binary'] = True
375 383 stats['ops'][COPIED_FILENODE] = (
376 384 'file copied from %s to %s' % (
377 385 head['copy_from'], head['copy_to']))
378 386
379 387 # If our new parsed headers didn't match anything fallback to
380 388 # old style detection
381 389 if op is None:
382 390 if not head['a_file'] and head['b_file']:
383 391 op = OPS.ADD
384 392 stats['binary'] = True
385 393 stats['ops'][NEW_FILENODE] = 'new file'
386 394
387 395 elif head['a_file'] and not head['b_file']:
388 396 op = OPS.DEL
389 397 stats['binary'] = True
390 398 stats['ops'][DEL_FILENODE] = 'deleted file'
391 399
392 400 # it's not ADD not DELETE
393 401 if op is None:
394 402 op = OPS.MOD
395 403 stats['binary'] = True
396 404 stats['ops'][MOD_FILENODE] = 'modified file'
397 405
398 406 # a real non-binary diff
399 407 if head['a_file'] or head['b_file']:
400 408 try:
401 409 raw_diff, chunks, _stats = self._parse_lines(diff)
402 410 stats['binary'] = False
403 411 stats['added'] = _stats[0]
404 412 stats['deleted'] = _stats[1]
405 413 # explicit mark that it's a modified file
406 414 if op == OPS.MOD:
407 415 stats['ops'][MOD_FILENODE] = 'modified file'
408 416 exceeds_limit = len(raw_diff) > self.file_limit
409 417
410 418 # changed from _escaper function so we validate size of
411 419 # each file instead of the whole diff
412 420 # diff will hide big files but still show small ones
413 421 # from my tests, big files are fairly safe to be parsed
414 422 # but the browser is the bottleneck
415 423 if not self.show_full_diff and exceeds_limit:
416 424 raise DiffLimitExceeded('File Limit Exceeded')
417 425
418 426 except DiffLimitExceeded:
419 427 diff_container = lambda _diff: \
420 428 LimitedDiffContainer(
421 429 self.diff_limit, self.cur_diff_size, _diff)
422 430
423 431 exceeds_limit = len(raw_diff) > self.file_limit
424 432 limited_diff = True
425 433 chunks = []
426 434
427 435 else: # GIT format binary patch, or possibly empty diff
428 436 if head['bin_patch']:
429 437 # we have operation already extracted, but we mark simply
430 438 # it's a diff we wont show for binary files
431 439 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
432 440 chunks = []
433 441
434 442 if chunks and not self.show_full_diff and op == OPS.DEL:
435 443 # if not full diff mode show deleted file contents
436 444 # TODO: anderson: if the view is not too big, there is no way
437 445 # to see the content of the file
438 446 chunks = []
439 447
440 448 chunks.insert(0, [{
441 449 'old_lineno': '',
442 450 'new_lineno': '',
443 451 'action': Action.CONTEXT,
444 452 'line': msg,
445 453 } for _op, msg in stats['ops'].iteritems()
446 454 if _op not in [MOD_FILENODE]])
447 455
448 456 _files.append({
449 457 'filename': safe_unicode(head['b_path']),
450 458 'old_revision': head['a_blob_id'],
451 459 'new_revision': head['b_blob_id'],
452 460 'chunks': chunks,
453 461 'raw_diff': safe_unicode(raw_diff),
454 462 'operation': op,
455 463 'stats': stats,
456 464 'exceeds_limit': exceeds_limit,
457 465 'is_limited_diff': limited_diff,
458 466 })
459 467
460 468 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
461 469 OPS.DEL: 2}.get(info['operation'])
462 470
463 471 if not inline_diff:
464 472 return diff_container(sorted(_files, key=sorter))
465 473
466 474 # highlight inline changes
467 475 for diff_data in _files:
468 476 for chunk in diff_data['chunks']:
469 477 lineiter = iter(chunk)
470 478 try:
471 479 while 1:
472 480 line = lineiter.next()
473 481 if line['action'] not in (
474 482 Action.UNMODIFIED, Action.CONTEXT):
475 483 nextline = lineiter.next()
476 484 if nextline['action'] in ['unmod', 'context'] or \
477 485 nextline['action'] == line['action']:
478 486 continue
479 487 self.differ(line, nextline)
480 488 except StopIteration:
481 489 pass
482 490
483 491 return diff_container(sorted(_files, key=sorter))
484 492
485 493 def _parse_udiff(self, inline_diff=True):
486 494 raise NotImplementedError()
487 495
488 496 def _parse_lines(self, diff):
489 497 """
490 498 Parse the diff an return data for the template.
491 499 """
492 500
493 501 lineiter = iter(diff)
494 502 stats = [0, 0]
495 503 chunks = []
496 504 raw_diff = []
497 505
498 506 try:
499 507 line = lineiter.next()
500 508
501 509 while line:
502 510 raw_diff.append(line)
503 511 lines = []
504 512 chunks.append(lines)
505 513
506 514 match = self._chunk_re.match(line)
507 515
508 516 if not match:
509 517 break
510 518
511 519 gr = match.groups()
512 520 (old_line, old_end,
513 521 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
514 522 old_line -= 1
515 523 new_line -= 1
516 524
517 525 context = len(gr) == 5
518 526 old_end += old_line
519 527 new_end += new_line
520 528
521 529 if context:
522 530 # skip context only if it's first line
523 531 if int(gr[0]) > 1:
524 532 lines.append({
525 533 'old_lineno': '...',
526 534 'new_lineno': '...',
527 535 'action': Action.CONTEXT,
528 536 'line': line,
529 537 })
530 538
531 539 line = lineiter.next()
532 540
533 541 while old_line < old_end or new_line < new_end:
534 542 command = ' '
535 543 if line:
536 544 command = line[0]
537 545
538 546 affects_old = affects_new = False
539 547
540 548 # ignore those if we don't expect them
541 549 if command in '#@':
542 550 continue
543 551 elif command == '+':
544 552 affects_new = True
545 553 action = Action.ADD
546 554 stats[0] += 1
547 555 elif command == '-':
548 556 affects_old = True
549 557 action = Action.DELETE
550 558 stats[1] += 1
551 559 else:
552 560 affects_old = affects_new = True
553 561 action = Action.UNMODIFIED
554 562
555 563 if not self._newline_marker.match(line):
556 564 old_line += affects_old
557 565 new_line += affects_new
558 566 lines.append({
559 567 'old_lineno': affects_old and old_line or '',
560 568 'new_lineno': affects_new and new_line or '',
561 569 'action': action,
562 570 'line': self._clean_line(line, command)
563 571 })
564 572 raw_diff.append(line)
565 573
566 574 line = lineiter.next()
567 575
568 576 if self._newline_marker.match(line):
569 577 # we need to append to lines, since this is not
570 578 # counted in the line specs of diff
571 579 lines.append({
572 580 'old_lineno': '...',
573 581 'new_lineno': '...',
574 582 'action': Action.CONTEXT,
575 583 'line': self._clean_line(line, command)
576 584 })
577 585
578 586 except StopIteration:
579 587 pass
580 588 return ''.join(raw_diff), chunks, stats
581 589
582 590 def _safe_id(self, idstring):
583 591 """Make a string safe for including in an id attribute.
584 592
585 593 The HTML spec says that id attributes 'must begin with
586 594 a letter ([A-Za-z]) and may be followed by any number
587 595 of letters, digits ([0-9]), hyphens ("-"), underscores
588 596 ("_"), colons (":"), and periods (".")'. These regexps
589 597 are slightly over-zealous, in that they remove colons
590 598 and periods unnecessarily.
591 599
592 600 Whitespace is transformed into underscores, and then
593 601 anything which is not a hyphen or a character that
594 602 matches \w (alphanumerics and underscore) is removed.
595 603
596 604 """
597 605 # Transform all whitespace to underscore
598 606 idstring = re.sub(r'\s', "_", '%s' % idstring)
599 607 # Remove everything that is not a hyphen or a member of \w
600 608 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
601 609 return idstring
602 610
603 611 def prepare(self, inline_diff=True):
604 612 """
605 613 Prepare the passed udiff for HTML rendering.
606 614
607 615 :return: A list of dicts with diff information.
608 616 """
609 617 parsed = self._parser(inline_diff=inline_diff)
610 618 self.parsed = True
611 619 self.parsed_diff = parsed
612 620 return parsed
613 621
614 622 def as_raw(self, diff_lines=None):
615 623 """
616 624 Returns raw diff as a byte string
617 625 """
618 626 return self._diff.raw
619 627
620 628 def as_html(self, table_class='code-difftable', line_class='line',
621 629 old_lineno_class='lineno old', new_lineno_class='lineno new',
622 630 code_class='code', enable_comments=False, parsed_lines=None):
623 631 """
624 632 Return given diff as html table with customized css classes
625 633 """
626 634 def _link_to_if(condition, label, url):
627 635 """
628 636 Generates a link if condition is meet or just the label if not.
629 637 """
630 638
631 639 if condition:
632 640 return '''<a href="%(url)s" class="tooltip"
633 641 title="%(title)s">%(label)s</a>''' % {
634 642 'title': _('Click to select line'),
635 643 'url': url,
636 644 'label': label
637 645 }
638 646 else:
639 647 return label
640 648 if not self.parsed:
641 649 self.prepare()
642 650
643 651 diff_lines = self.parsed_diff
644 652 if parsed_lines:
645 653 diff_lines = parsed_lines
646 654
647 655 _html_empty = True
648 656 _html = []
649 657 _html.append('''<table class="%(table_class)s">\n''' % {
650 658 'table_class': table_class
651 659 })
652 660
653 661 for diff in diff_lines:
654 662 for line in diff['chunks']:
655 663 _html_empty = False
656 664 for change in line:
657 665 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
658 666 'lc': line_class,
659 667 'action': change['action']
660 668 })
661 669 anchor_old_id = ''
662 670 anchor_new_id = ''
663 671 anchor_old = "%(filename)s_o%(oldline_no)s" % {
664 672 'filename': self._safe_id(diff['filename']),
665 673 'oldline_no': change['old_lineno']
666 674 }
667 675 anchor_new = "%(filename)s_n%(oldline_no)s" % {
668 676 'filename': self._safe_id(diff['filename']),
669 677 'oldline_no': change['new_lineno']
670 678 }
671 679 cond_old = (change['old_lineno'] != '...' and
672 680 change['old_lineno'])
673 681 cond_new = (change['new_lineno'] != '...' and
674 682 change['new_lineno'])
675 683 if cond_old:
676 684 anchor_old_id = 'id="%s"' % anchor_old
677 685 if cond_new:
678 686 anchor_new_id = 'id="%s"' % anchor_new
679 687
680 688 if change['action'] != Action.CONTEXT:
681 689 anchor_link = True
682 690 else:
683 691 anchor_link = False
684 692
685 693 ###########################################################
686 694 # COMMENT ICON
687 695 ###########################################################
688 696 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
689 697
690 698 if enable_comments and change['action'] != Action.CONTEXT:
691 699 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
692 700
693 701 _html.append('''</span></td>\n''')
694 702
695 703 ###########################################################
696 704 # OLD LINE NUMBER
697 705 ###########################################################
698 706 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
699 707 'a_id': anchor_old_id,
700 708 'olc': old_lineno_class
701 709 })
702 710
703 711 _html.append('''%(link)s''' % {
704 712 'link': _link_to_if(anchor_link, change['old_lineno'],
705 713 '#%s' % anchor_old)
706 714 })
707 715 _html.append('''</td>\n''')
708 716 ###########################################################
709 717 # NEW LINE NUMBER
710 718 ###########################################################
711 719
712 720 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
713 721 'a_id': anchor_new_id,
714 722 'nlc': new_lineno_class
715 723 })
716 724
717 725 _html.append('''%(link)s''' % {
718 726 'link': _link_to_if(anchor_link, change['new_lineno'],
719 727 '#%s' % anchor_new)
720 728 })
721 729 _html.append('''</td>\n''')
722 730 ###########################################################
723 731 # CODE
724 732 ###########################################################
725 733 code_classes = [code_class]
726 734 if (not enable_comments or
727 735 change['action'] == Action.CONTEXT):
728 736 code_classes.append('no-comment')
729 737 _html.append('\t<td class="%s">' % ' '.join(code_classes))
730 738 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
731 739 'code': change['line']
732 740 })
733 741
734 742 _html.append('''\t</td>''')
735 743 _html.append('''\n</tr>\n''')
736 744 _html.append('''</table>''')
737 745 if _html_empty:
738 746 return None
739 747 return ''.join(_html)
740 748
741 749 def stat(self):
742 750 """
743 751 Returns tuple of added, and removed lines for this instance
744 752 """
745 753 return self.adds, self.removes
746 754
747 755 def get_context_of_line(
748 756 self, path, diff_line=None, context_before=3, context_after=3):
749 757 """
750 758 Returns the context lines for the specified diff line.
751 759
752 760 :type diff_line: :class:`DiffLineNumber`
753 761 """
754 762 assert self.parsed, "DiffProcessor is not initialized."
755 763
756 764 if None not in diff_line:
757 765 raise ValueError(
758 766 "Cannot specify both line numbers: {}".format(diff_line))
759 767
760 768 file_diff = self._get_file_diff(path)
761 769 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
762 770
763 771 first_line_to_include = max(idx - context_before, 0)
764 772 first_line_after_context = idx + context_after + 1
765 773 context_lines = chunk[first_line_to_include:first_line_after_context]
766 774
767 775 line_contents = [
768 776 _context_line(line) for line in context_lines
769 777 if _is_diff_content(line)]
770 778 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
771 779 # Once they are fixed, we can drop this line here.
772 780 if line_contents:
773 781 line_contents[-1] = (
774 782 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
775 783 return line_contents
776 784
777 785 def find_context(self, path, context, offset=0):
778 786 """
779 787 Finds the given `context` inside of the diff.
780 788
781 789 Use the parameter `offset` to specify which offset the target line has
782 790 inside of the given `context`. This way the correct diff line will be
783 791 returned.
784 792
785 793 :param offset: Shall be used to specify the offset of the main line
786 794 within the given `context`.
787 795 """
788 796 if offset < 0 or offset >= len(context):
789 797 raise ValueError(
790 798 "Only positive values up to the length of the context "
791 799 "minus one are allowed.")
792 800
793 801 matches = []
794 802 file_diff = self._get_file_diff(path)
795 803
796 804 for chunk in file_diff['chunks']:
797 805 context_iter = iter(context)
798 806 for line_idx, line in enumerate(chunk):
799 807 try:
800 808 if _context_line(line) == context_iter.next():
801 809 continue
802 810 except StopIteration:
803 811 matches.append((line_idx, chunk))
804 812 context_iter = iter(context)
805 813
806 814 # Increment position and triger StopIteration
807 815 # if we had a match at the end
808 816 line_idx += 1
809 817 try:
810 818 context_iter.next()
811 819 except StopIteration:
812 820 matches.append((line_idx, chunk))
813 821
814 822 effective_offset = len(context) - offset
815 823 found_at_diff_lines = [
816 824 _line_to_diff_line_number(chunk[idx - effective_offset])
817 825 for idx, chunk in matches]
818 826
819 827 return found_at_diff_lines
820 828
821 829 def _get_file_diff(self, path):
822 830 for file_diff in self.parsed_diff:
823 831 if file_diff['filename'] == path:
824 832 break
825 833 else:
826 834 raise FileNotInDiffException("File {} not in diff".format(path))
827 835 return file_diff
828 836
829 837 def _find_chunk_line_index(self, file_diff, diff_line):
830 838 for chunk in file_diff['chunks']:
831 839 for idx, line in enumerate(chunk):
832 840 if line['old_lineno'] == diff_line.old:
833 841 return chunk, idx
834 842 if line['new_lineno'] == diff_line.new:
835 843 return chunk, idx
836 844 raise LineNotInDiffException(
837 845 "The line {} is not part of the diff.".format(diff_line))
838 846
839 847
840 848 def _is_diff_content(line):
841 849 return line['action'] in (
842 850 Action.UNMODIFIED, Action.ADD, Action.DELETE)
843 851
844 852
845 853 def _context_line(line):
846 854 return (line['action'], line['line'])
847 855
848 856
849 857 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
850 858
851 859
852 860 def _line_to_diff_line_number(line):
853 861 new_line_no = line['new_lineno'] or None
854 862 old_line_no = line['old_lineno'] or None
855 863 return DiffLineNumber(old=old_line_no, new=new_line_no)
856 864
857 865
858 866 class FileNotInDiffException(Exception):
859 867 """
860 868 Raised when the context for a missing file is requested.
861 869
862 870 If you request the context for a line in a file which is not part of the
863 871 given diff, then this exception is raised.
864 872 """
865 873
866 874
867 875 class LineNotInDiffException(Exception):
868 876 """
869 877 Raised when the context for a missing line is requested.
870 878
871 879 If you request the context for a line in a file and this line is not
872 880 part of the given diff, then this exception is raised.
873 881 """
874 882
875 883
876 884 class DiffLimitExceeded(Exception):
877 885 pass
General Comments 0
You need to be logged in to leave comments. Login now