##// END OF EJS Templates
diffs: make validation of version, so we can change diffs and force re-cache if diffs are in old version.
marcink -
r3079:b924aea3 default
parent child Browse files
Show More
@@ -1,1213 +1,1228 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Set of diffing helpers, previously part of vcs
24 24 """
25 25
26 26 import os
27 27 import re
28 28 import bz2
29 29
30 30 import collections
31 31 import difflib
32 32 import logging
33 33 import cPickle as pickle
34 34 from itertools import tee, imap
35 35
36 36 from rhodecode.lib.vcs.exceptions import VCSError
37 37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
38 38 from rhodecode.lib.utils2 import safe_unicode, safe_str
39 39
40 40 log = logging.getLogger(__name__)
41 41
42 42 # define max context, a file with more than this numbers of lines is unusable
43 43 # in browser anyway
44 44 MAX_CONTEXT = 1024 * 1014
45 45
46 46
47 47 class OPS(object):
48 48 ADD = 'A'
49 49 MOD = 'M'
50 50 DEL = 'D'
51 51
52 52
53 53 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
54 54 """
55 55 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
56 56
57 57 :param ignore_whitespace: ignore whitespaces in diff
58 58 """
59 59 # make sure we pass in default context
60 60 context = context or 3
61 61 # protect against IntOverflow when passing HUGE context
62 62 if context > MAX_CONTEXT:
63 63 context = MAX_CONTEXT
64 64
65 65 submodules = filter(lambda o: isinstance(o, SubModuleNode),
66 66 [filenode_new, filenode_old])
67 67 if submodules:
68 68 return ''
69 69
70 70 for filenode in (filenode_old, filenode_new):
71 71 if not isinstance(filenode, FileNode):
72 72 raise VCSError(
73 73 "Given object should be FileNode object, not %s"
74 74 % filenode.__class__)
75 75
76 76 repo = filenode_new.commit.repository
77 77 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
78 78 new_commit = filenode_new.commit
79 79
80 80 vcs_gitdiff = repo.get_diff(
81 81 old_commit, new_commit, filenode_new.path,
82 82 ignore_whitespace, context, path1=filenode_old.path)
83 83 return vcs_gitdiff
84 84
85 85 NEW_FILENODE = 1
86 86 DEL_FILENODE = 2
87 87 MOD_FILENODE = 3
88 88 RENAMED_FILENODE = 4
89 89 COPIED_FILENODE = 5
90 90 CHMOD_FILENODE = 6
91 91 BIN_FILENODE = 7
92 92
93 93
94 94 class LimitedDiffContainer(object):
95 95
96 96 def __init__(self, diff_limit, cur_diff_size, diff):
97 97 self.diff = diff
98 98 self.diff_limit = diff_limit
99 99 self.cur_diff_size = cur_diff_size
100 100
101 101 def __getitem__(self, key):
102 102 return self.diff.__getitem__(key)
103 103
104 104 def __iter__(self):
105 105 for l in self.diff:
106 106 yield l
107 107
108 108
109 109 class Action(object):
110 110 """
111 111 Contains constants for the action value of the lines in a parsed diff.
112 112 """
113 113
114 114 ADD = 'add'
115 115 DELETE = 'del'
116 116 UNMODIFIED = 'unmod'
117 117
118 118 CONTEXT = 'context'
119 119 OLD_NO_NL = 'old-no-nl'
120 120 NEW_NO_NL = 'new-no-nl'
121 121
122 122
123 123 class DiffProcessor(object):
124 124 """
125 125 Give it a unified or git diff and it returns a list of the files that were
126 126 mentioned in the diff together with a dict of meta information that
127 127 can be used to render it in a HTML template.
128 128
129 129 .. note:: Unicode handling
130 130
131 131 The original diffs are a byte sequence and can contain filenames
132 132 in mixed encodings. This class generally returns `unicode` objects
133 133 since the result is intended for presentation to the user.
134 134
135 135 """
136 136 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
137 137 _newline_marker = re.compile(r'^\\ No newline at end of file')
138 138
139 139 # used for inline highlighter word split
140 140 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
141 141
142 142 # collapse ranges of commits over given number
143 143 _collapse_commits_over = 5
144 144
145 145 def __init__(self, diff, format='gitdiff', diff_limit=None,
146 146 file_limit=None, show_full_diff=True):
147 147 """
148 148 :param diff: A `Diff` object representing a diff from a vcs backend
149 149 :param format: format of diff passed, `udiff` or `gitdiff`
150 150 :param diff_limit: define the size of diff that is considered "big"
151 151 based on that parameter cut off will be triggered, set to None
152 152 to show full diff
153 153 """
154 154 self._diff = diff
155 155 self._format = format
156 156 self.adds = 0
157 157 self.removes = 0
158 158 # calculate diff size
159 159 self.diff_limit = diff_limit
160 160 self.file_limit = file_limit
161 161 self.show_full_diff = show_full_diff
162 162 self.cur_diff_size = 0
163 163 self.parsed = False
164 164 self.parsed_diff = []
165 165
166 166 log.debug('Initialized DiffProcessor with %s mode', format)
167 167 if format == 'gitdiff':
168 168 self.differ = self._highlight_line_difflib
169 169 self._parser = self._parse_gitdiff
170 170 else:
171 171 self.differ = self._highlight_line_udiff
172 172 self._parser = self._new_parse_gitdiff
173 173
174 174 def _copy_iterator(self):
175 175 """
176 176 make a fresh copy of generator, we should not iterate thru
177 177 an original as it's needed for repeating operations on
178 178 this instance of DiffProcessor
179 179 """
180 180 self.__udiff, iterator_copy = tee(self.__udiff)
181 181 return iterator_copy
182 182
183 183 def _escaper(self, string):
184 184 """
185 185 Escaper for diff escapes special chars and checks the diff limit
186 186
187 187 :param string:
188 188 """
189 189 self.cur_diff_size += len(string)
190 190
191 191 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
192 192 raise DiffLimitExceeded('Diff Limit Exceeded')
193 193
194 194 return string \
195 195 .replace('&', '&amp;')\
196 196 .replace('<', '&lt;')\
197 197 .replace('>', '&gt;')
198 198
199 199 def _line_counter(self, l):
200 200 """
201 201 Checks each line and bumps total adds/removes for this diff
202 202
203 203 :param l:
204 204 """
205 205 if l.startswith('+') and not l.startswith('+++'):
206 206 self.adds += 1
207 207 elif l.startswith('-') and not l.startswith('---'):
208 208 self.removes += 1
209 209 return safe_unicode(l)
210 210
211 211 def _highlight_line_difflib(self, line, next_):
212 212 """
213 213 Highlight inline changes in both lines.
214 214 """
215 215
216 216 if line['action'] == Action.DELETE:
217 217 old, new = line, next_
218 218 else:
219 219 old, new = next_, line
220 220
221 221 oldwords = self._token_re.split(old['line'])
222 222 newwords = self._token_re.split(new['line'])
223 223 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
224 224
225 225 oldfragments, newfragments = [], []
226 226 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
227 227 oldfrag = ''.join(oldwords[i1:i2])
228 228 newfrag = ''.join(newwords[j1:j2])
229 229 if tag != 'equal':
230 230 if oldfrag:
231 231 oldfrag = '<del>%s</del>' % oldfrag
232 232 if newfrag:
233 233 newfrag = '<ins>%s</ins>' % newfrag
234 234 oldfragments.append(oldfrag)
235 235 newfragments.append(newfrag)
236 236
237 237 old['line'] = "".join(oldfragments)
238 238 new['line'] = "".join(newfragments)
239 239
240 240 def _highlight_line_udiff(self, line, next_):
241 241 """
242 242 Highlight inline changes in both lines.
243 243 """
244 244 start = 0
245 245 limit = min(len(line['line']), len(next_['line']))
246 246 while start < limit and line['line'][start] == next_['line'][start]:
247 247 start += 1
248 248 end = -1
249 249 limit -= start
250 250 while -end <= limit and line['line'][end] == next_['line'][end]:
251 251 end -= 1
252 252 end += 1
253 253 if start or end:
254 254 def do(l):
255 255 last = end + len(l['line'])
256 256 if l['action'] == Action.ADD:
257 257 tag = 'ins'
258 258 else:
259 259 tag = 'del'
260 260 l['line'] = '%s<%s>%s</%s>%s' % (
261 261 l['line'][:start],
262 262 tag,
263 263 l['line'][start:last],
264 264 tag,
265 265 l['line'][last:]
266 266 )
267 267 do(line)
268 268 do(next_)
269 269
270 270 def _clean_line(self, line, command):
271 271 if command in ['+', '-', ' ']:
272 272 # only modify the line if it's actually a diff thing
273 273 line = line[1:]
274 274 return line
275 275
276 276 def _parse_gitdiff(self, inline_diff=True):
277 277 _files = []
278 278 diff_container = lambda arg: arg
279 279
280 280 for chunk in self._diff.chunks():
281 281 head = chunk.header
282 282
283 283 diff = imap(self._escaper, self.diff_splitter(chunk.diff))
284 284 raw_diff = chunk.raw
285 285 limited_diff = False
286 286 exceeds_limit = False
287 287
288 288 op = None
289 289 stats = {
290 290 'added': 0,
291 291 'deleted': 0,
292 292 'binary': False,
293 293 'ops': {},
294 294 }
295 295
296 296 if head['deleted_file_mode']:
297 297 op = OPS.DEL
298 298 stats['binary'] = True
299 299 stats['ops'][DEL_FILENODE] = 'deleted file'
300 300
301 301 elif head['new_file_mode']:
302 302 op = OPS.ADD
303 303 stats['binary'] = True
304 304 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
305 305 else: # modify operation, can be copy, rename or chmod
306 306
307 307 # CHMOD
308 308 if head['new_mode'] and head['old_mode']:
309 309 op = OPS.MOD
310 310 stats['binary'] = True
311 311 stats['ops'][CHMOD_FILENODE] = (
312 312 'modified file chmod %s => %s' % (
313 313 head['old_mode'], head['new_mode']))
314 314 # RENAME
315 315 if head['rename_from'] != head['rename_to']:
316 316 op = OPS.MOD
317 317 stats['binary'] = True
318 318 stats['ops'][RENAMED_FILENODE] = (
319 319 'file renamed from %s to %s' % (
320 320 head['rename_from'], head['rename_to']))
321 321 # COPY
322 322 if head.get('copy_from') and head.get('copy_to'):
323 323 op = OPS.MOD
324 324 stats['binary'] = True
325 325 stats['ops'][COPIED_FILENODE] = (
326 326 'file copied from %s to %s' % (
327 327 head['copy_from'], head['copy_to']))
328 328
329 329 # If our new parsed headers didn't match anything fallback to
330 330 # old style detection
331 331 if op is None:
332 332 if not head['a_file'] and head['b_file']:
333 333 op = OPS.ADD
334 334 stats['binary'] = True
335 335 stats['ops'][NEW_FILENODE] = 'new file'
336 336
337 337 elif head['a_file'] and not head['b_file']:
338 338 op = OPS.DEL
339 339 stats['binary'] = True
340 340 stats['ops'][DEL_FILENODE] = 'deleted file'
341 341
342 342 # it's not ADD not DELETE
343 343 if op is None:
344 344 op = OPS.MOD
345 345 stats['binary'] = True
346 346 stats['ops'][MOD_FILENODE] = 'modified file'
347 347
348 348 # a real non-binary diff
349 349 if head['a_file'] or head['b_file']:
350 350 try:
351 351 raw_diff, chunks, _stats = self._parse_lines(diff)
352 352 stats['binary'] = False
353 353 stats['added'] = _stats[0]
354 354 stats['deleted'] = _stats[1]
355 355 # explicit mark that it's a modified file
356 356 if op == OPS.MOD:
357 357 stats['ops'][MOD_FILENODE] = 'modified file'
358 358 exceeds_limit = len(raw_diff) > self.file_limit
359 359
360 360 # changed from _escaper function so we validate size of
361 361 # each file instead of the whole diff
362 362 # diff will hide big files but still show small ones
363 363 # from my tests, big files are fairly safe to be parsed
364 364 # but the browser is the bottleneck
365 365 if not self.show_full_diff and exceeds_limit:
366 366 raise DiffLimitExceeded('File Limit Exceeded')
367 367
368 368 except DiffLimitExceeded:
369 369 diff_container = lambda _diff: \
370 370 LimitedDiffContainer(
371 371 self.diff_limit, self.cur_diff_size, _diff)
372 372
373 373 exceeds_limit = len(raw_diff) > self.file_limit
374 374 limited_diff = True
375 375 chunks = []
376 376
377 377 else: # GIT format binary patch, or possibly empty diff
378 378 if head['bin_patch']:
379 379 # we have operation already extracted, but we mark simply
380 380 # it's a diff we wont show for binary files
381 381 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
382 382 chunks = []
383 383
384 384 if chunks and not self.show_full_diff and op == OPS.DEL:
385 385 # if not full diff mode show deleted file contents
386 386 # TODO: anderson: if the view is not too big, there is no way
387 387 # to see the content of the file
388 388 chunks = []
389 389
390 390 chunks.insert(0, [{
391 391 'old_lineno': '',
392 392 'new_lineno': '',
393 393 'action': Action.CONTEXT,
394 394 'line': msg,
395 395 } for _op, msg in stats['ops'].iteritems()
396 396 if _op not in [MOD_FILENODE]])
397 397
398 398 _files.append({
399 399 'filename': safe_unicode(head['b_path']),
400 400 'old_revision': head['a_blob_id'],
401 401 'new_revision': head['b_blob_id'],
402 402 'chunks': chunks,
403 403 'raw_diff': safe_unicode(raw_diff),
404 404 'operation': op,
405 405 'stats': stats,
406 406 'exceeds_limit': exceeds_limit,
407 407 'is_limited_diff': limited_diff,
408 408 })
409 409
410 410 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
411 411 OPS.DEL: 2}.get(info['operation'])
412 412
413 413 if not inline_diff:
414 414 return diff_container(sorted(_files, key=sorter))
415 415
416 416 # highlight inline changes
417 417 for diff_data in _files:
418 418 for chunk in diff_data['chunks']:
419 419 lineiter = iter(chunk)
420 420 try:
421 421 while 1:
422 422 line = lineiter.next()
423 423 if line['action'] not in (
424 424 Action.UNMODIFIED, Action.CONTEXT):
425 425 nextline = lineiter.next()
426 426 if nextline['action'] in ['unmod', 'context'] or \
427 427 nextline['action'] == line['action']:
428 428 continue
429 429 self.differ(line, nextline)
430 430 except StopIteration:
431 431 pass
432 432
433 433 return diff_container(sorted(_files, key=sorter))
434 434
435 435 def _check_large_diff(self):
436 436 log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
437 437 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
438 438 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
439 439
440 440 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
441 441 def _new_parse_gitdiff(self, inline_diff=True):
442 442 _files = []
443 443
444 444 # this can be overriden later to a LimitedDiffContainer type
445 445 diff_container = lambda arg: arg
446 446
447 447 for chunk in self._diff.chunks():
448 448 head = chunk.header
449 449 log.debug('parsing diff %r', head)
450 450
451 451 raw_diff = chunk.raw
452 452 limited_diff = False
453 453 exceeds_limit = False
454 454
455 455 op = None
456 456 stats = {
457 457 'added': 0,
458 458 'deleted': 0,
459 459 'binary': False,
460 460 'old_mode': None,
461 461 'new_mode': None,
462 462 'ops': {},
463 463 }
464 464 if head['old_mode']:
465 465 stats['old_mode'] = head['old_mode']
466 466 if head['new_mode']:
467 467 stats['new_mode'] = head['new_mode']
468 468 if head['b_mode']:
469 469 stats['new_mode'] = head['b_mode']
470 470
471 471 # delete file
472 472 if head['deleted_file_mode']:
473 473 op = OPS.DEL
474 474 stats['binary'] = True
475 475 stats['ops'][DEL_FILENODE] = 'deleted file'
476 476
477 477 # new file
478 478 elif head['new_file_mode']:
479 479 op = OPS.ADD
480 480 stats['binary'] = True
481 481 stats['old_mode'] = None
482 482 stats['new_mode'] = head['new_file_mode']
483 483 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
484 484
485 485 # modify operation, can be copy, rename or chmod
486 486 else:
487 487 # CHMOD
488 488 if head['new_mode'] and head['old_mode']:
489 489 op = OPS.MOD
490 490 stats['binary'] = True
491 491 stats['ops'][CHMOD_FILENODE] = (
492 492 'modified file chmod %s => %s' % (
493 493 head['old_mode'], head['new_mode']))
494 494
495 495 # RENAME
496 496 if head['rename_from'] != head['rename_to']:
497 497 op = OPS.MOD
498 498 stats['binary'] = True
499 499 stats['renamed'] = (head['rename_from'], head['rename_to'])
500 500 stats['ops'][RENAMED_FILENODE] = (
501 501 'file renamed from %s to %s' % (
502 502 head['rename_from'], head['rename_to']))
503 503 # COPY
504 504 if head.get('copy_from') and head.get('copy_to'):
505 505 op = OPS.MOD
506 506 stats['binary'] = True
507 507 stats['copied'] = (head['copy_from'], head['copy_to'])
508 508 stats['ops'][COPIED_FILENODE] = (
509 509 'file copied from %s to %s' % (
510 510 head['copy_from'], head['copy_to']))
511 511
512 512 # If our new parsed headers didn't match anything fallback to
513 513 # old style detection
514 514 if op is None:
515 515 if not head['a_file'] and head['b_file']:
516 516 op = OPS.ADD
517 517 stats['binary'] = True
518 518 stats['new_file'] = True
519 519 stats['ops'][NEW_FILENODE] = 'new file'
520 520
521 521 elif head['a_file'] and not head['b_file']:
522 522 op = OPS.DEL
523 523 stats['binary'] = True
524 524 stats['ops'][DEL_FILENODE] = 'deleted file'
525 525
526 526 # it's not ADD not DELETE
527 527 if op is None:
528 528 op = OPS.MOD
529 529 stats['binary'] = True
530 530 stats['ops'][MOD_FILENODE] = 'modified file'
531 531
532 532 # a real non-binary diff
533 533 if head['a_file'] or head['b_file']:
534 534 # simulate splitlines, so we keep the line end part
535 535 diff = self.diff_splitter(chunk.diff)
536 536
537 537 # append each file to the diff size
538 538 raw_chunk_size = len(raw_diff)
539 539
540 540 exceeds_limit = raw_chunk_size > self.file_limit
541 541 self.cur_diff_size += raw_chunk_size
542 542
543 543 try:
544 544 # Check each file instead of the whole diff.
545 545 # Diff will hide big files but still show small ones.
546 546 # From the tests big files are fairly safe to be parsed
547 547 # but the browser is the bottleneck.
548 548 if not self.show_full_diff and exceeds_limit:
549 549 log.debug('File `%s` exceeds current file_limit of %s',
550 550 safe_unicode(head['b_path']), self.file_limit)
551 551 raise DiffLimitExceeded(
552 552 'File Limit %s Exceeded', self.file_limit)
553 553
554 554 self._check_large_diff()
555 555
556 556 raw_diff, chunks, _stats = self._new_parse_lines(diff)
557 557 stats['binary'] = False
558 558 stats['added'] = _stats[0]
559 559 stats['deleted'] = _stats[1]
560 560 # explicit mark that it's a modified file
561 561 if op == OPS.MOD:
562 562 stats['ops'][MOD_FILENODE] = 'modified file'
563 563
564 564 except DiffLimitExceeded:
565 565 diff_container = lambda _diff: \
566 566 LimitedDiffContainer(
567 567 self.diff_limit, self.cur_diff_size, _diff)
568 568
569 569 limited_diff = True
570 570 chunks = []
571 571
572 572 else: # GIT format binary patch, or possibly empty diff
573 573 if head['bin_patch']:
574 574 # we have operation already extracted, but we mark simply
575 575 # it's a diff we wont show for binary files
576 576 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
577 577 chunks = []
578 578
579 579 # Hide content of deleted node by setting empty chunks
580 580 if chunks and not self.show_full_diff and op == OPS.DEL:
581 581 # if not full diff mode show deleted file contents
582 582 # TODO: anderson: if the view is not too big, there is no way
583 583 # to see the content of the file
584 584 chunks = []
585 585
586 586 chunks.insert(
587 587 0, [{'old_lineno': '',
588 588 'new_lineno': '',
589 589 'action': Action.CONTEXT,
590 590 'line': msg,
591 591 } for _op, msg in stats['ops'].iteritems()
592 592 if _op not in [MOD_FILENODE]])
593 593
594 594 original_filename = safe_unicode(head['a_path'])
595 595 _files.append({
596 596 'original_filename': original_filename,
597 597 'filename': safe_unicode(head['b_path']),
598 598 'old_revision': head['a_blob_id'],
599 599 'new_revision': head['b_blob_id'],
600 600 'chunks': chunks,
601 601 'raw_diff': safe_unicode(raw_diff),
602 602 'operation': op,
603 603 'stats': stats,
604 604 'exceeds_limit': exceeds_limit,
605 605 'is_limited_diff': limited_diff,
606 606 })
607 607
608 608 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
609 609 OPS.DEL: 2}.get(info['operation'])
610 610
611 611 return diff_container(sorted(_files, key=sorter))
612 612
613 613 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
614 614 def _parse_lines(self, diff_iter):
615 615 """
616 616 Parse the diff an return data for the template.
617 617 """
618 618
619 619 stats = [0, 0]
620 620 chunks = []
621 621 raw_diff = []
622 622
623 623 try:
624 624 line = diff_iter.next()
625 625
626 626 while line:
627 627 raw_diff.append(line)
628 628 lines = []
629 629 chunks.append(lines)
630 630
631 631 match = self._chunk_re.match(line)
632 632
633 633 if not match:
634 634 break
635 635
636 636 gr = match.groups()
637 637 (old_line, old_end,
638 638 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
639 639 old_line -= 1
640 640 new_line -= 1
641 641
642 642 context = len(gr) == 5
643 643 old_end += old_line
644 644 new_end += new_line
645 645
646 646 if context:
647 647 # skip context only if it's first line
648 648 if int(gr[0]) > 1:
649 649 lines.append({
650 650 'old_lineno': '...',
651 651 'new_lineno': '...',
652 652 'action': Action.CONTEXT,
653 653 'line': line,
654 654 })
655 655
656 656 line = diff_iter.next()
657 657
658 658 while old_line < old_end or new_line < new_end:
659 659 command = ' '
660 660 if line:
661 661 command = line[0]
662 662
663 663 affects_old = affects_new = False
664 664
665 665 # ignore those if we don't expect them
666 666 if command in '#@':
667 667 continue
668 668 elif command == '+':
669 669 affects_new = True
670 670 action = Action.ADD
671 671 stats[0] += 1
672 672 elif command == '-':
673 673 affects_old = True
674 674 action = Action.DELETE
675 675 stats[1] += 1
676 676 else:
677 677 affects_old = affects_new = True
678 678 action = Action.UNMODIFIED
679 679
680 680 if not self._newline_marker.match(line):
681 681 old_line += affects_old
682 682 new_line += affects_new
683 683 lines.append({
684 684 'old_lineno': affects_old and old_line or '',
685 685 'new_lineno': affects_new and new_line or '',
686 686 'action': action,
687 687 'line': self._clean_line(line, command)
688 688 })
689 689 raw_diff.append(line)
690 690
691 691 line = diff_iter.next()
692 692
693 693 if self._newline_marker.match(line):
694 694 # we need to append to lines, since this is not
695 695 # counted in the line specs of diff
696 696 lines.append({
697 697 'old_lineno': '...',
698 698 'new_lineno': '...',
699 699 'action': Action.CONTEXT,
700 700 'line': self._clean_line(line, command)
701 701 })
702 702
703 703 except StopIteration:
704 704 pass
705 705 return ''.join(raw_diff), chunks, stats
706 706
707 707 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
708 708 def _new_parse_lines(self, diff_iter):
709 709 """
710 710 Parse the diff an return data for the template.
711 711 """
712 712
713 713 stats = [0, 0]
714 714 chunks = []
715 715 raw_diff = []
716 716
717 717 try:
718 718 line = diff_iter.next()
719 719
720 720 while line:
721 721 raw_diff.append(line)
722 722 # match header e.g @@ -0,0 +1 @@\n'
723 723 match = self._chunk_re.match(line)
724 724
725 725 if not match:
726 726 break
727 727
728 728 gr = match.groups()
729 729 (old_line, old_end,
730 730 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
731 731
732 732 lines = []
733 733 hunk = {
734 734 'section_header': gr[-1],
735 735 'source_start': old_line,
736 736 'source_length': old_end,
737 737 'target_start': new_line,
738 738 'target_length': new_end,
739 739 'lines': lines,
740 740 }
741 741 chunks.append(hunk)
742 742
743 743 old_line -= 1
744 744 new_line -= 1
745 745
746 746 context = len(gr) == 5
747 747 old_end += old_line
748 748 new_end += new_line
749 749
750 750 line = diff_iter.next()
751 751
752 752 while old_line < old_end or new_line < new_end:
753 753 command = ' '
754 754 if line:
755 755 command = line[0]
756 756
757 757 affects_old = affects_new = False
758 758
759 759 # ignore those if we don't expect them
760 760 if command in '#@':
761 761 continue
762 762 elif command == '+':
763 763 affects_new = True
764 764 action = Action.ADD
765 765 stats[0] += 1
766 766 elif command == '-':
767 767 affects_old = True
768 768 action = Action.DELETE
769 769 stats[1] += 1
770 770 else:
771 771 affects_old = affects_new = True
772 772 action = Action.UNMODIFIED
773 773
774 774 if not self._newline_marker.match(line):
775 775 old_line += affects_old
776 776 new_line += affects_new
777 777 lines.append({
778 778 'old_lineno': affects_old and old_line or '',
779 779 'new_lineno': affects_new and new_line or '',
780 780 'action': action,
781 781 'line': self._clean_line(line, command)
782 782 })
783 783 raw_diff.append(line)
784 784
785 785 line = diff_iter.next()
786 786
787 787 if self._newline_marker.match(line):
788 788 # we need to append to lines, since this is not
789 789 # counted in the line specs of diff
790 790 if affects_old:
791 791 action = Action.OLD_NO_NL
792 792 elif affects_new:
793 793 action = Action.NEW_NO_NL
794 794 else:
795 795 raise Exception('invalid context for no newline')
796 796
797 797 lines.append({
798 798 'old_lineno': None,
799 799 'new_lineno': None,
800 800 'action': action,
801 801 'line': self._clean_line(line, command)
802 802 })
803 803
804 804 except StopIteration:
805 805 pass
806 806
807 807 return ''.join(raw_diff), chunks, stats
808 808
809 809 def _safe_id(self, idstring):
810 810 """Make a string safe for including in an id attribute.
811 811
812 812 The HTML spec says that id attributes 'must begin with
813 813 a letter ([A-Za-z]) and may be followed by any number
814 814 of letters, digits ([0-9]), hyphens ("-"), underscores
815 815 ("_"), colons (":"), and periods (".")'. These regexps
816 816 are slightly over-zealous, in that they remove colons
817 817 and periods unnecessarily.
818 818
819 819 Whitespace is transformed into underscores, and then
820 820 anything which is not a hyphen or a character that
821 821 matches \w (alphanumerics and underscore) is removed.
822 822
823 823 """
824 824 # Transform all whitespace to underscore
825 825 idstring = re.sub(r'\s', "_", '%s' % idstring)
826 826 # Remove everything that is not a hyphen or a member of \w
827 827 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
828 828 return idstring
829 829
830 830 @classmethod
831 831 def diff_splitter(cls, string):
832 832 """
833 833 Diff split that emulates .splitlines() but works only on \n
834 834 """
835 835 if not string:
836 836 return
837 837 elif string == '\n':
838 838 yield u'\n'
839 839 else:
840 840
841 841 has_newline = string.endswith('\n')
842 842 elements = string.split('\n')
843 843 if has_newline:
844 844 # skip last element as it's empty string from newlines
845 845 elements = elements[:-1]
846 846
847 847 len_elements = len(elements)
848 848
849 849 for cnt, line in enumerate(elements, start=1):
850 850 last_line = cnt == len_elements
851 851 if last_line and not has_newline:
852 852 yield safe_unicode(line)
853 853 else:
854 854 yield safe_unicode(line) + '\n'
855 855
856 856 def prepare(self, inline_diff=True):
857 857 """
858 858 Prepare the passed udiff for HTML rendering.
859 859
860 860 :return: A list of dicts with diff information.
861 861 """
862 862 parsed = self._parser(inline_diff=inline_diff)
863 863 self.parsed = True
864 864 self.parsed_diff = parsed
865 865 return parsed
866 866
867 867 def as_raw(self, diff_lines=None):
868 868 """
869 869 Returns raw diff as a byte string
870 870 """
871 871 return self._diff.raw
872 872
873 873 def as_html(self, table_class='code-difftable', line_class='line',
874 874 old_lineno_class='lineno old', new_lineno_class='lineno new',
875 875 code_class='code', enable_comments=False, parsed_lines=None):
876 876 """
877 877 Return given diff as html table with customized css classes
878 878 """
879 879 # TODO(marcink): not sure how to pass in translator
880 880 # here in an efficient way, leave the _ for proper gettext extraction
881 881 _ = lambda s: s
882 882
883 883 def _link_to_if(condition, label, url):
884 884 """
885 885 Generates a link if condition is meet or just the label if not.
886 886 """
887 887
888 888 if condition:
889 889 return '''<a href="%(url)s" class="tooltip"
890 890 title="%(title)s">%(label)s</a>''' % {
891 891 'title': _('Click to select line'),
892 892 'url': url,
893 893 'label': label
894 894 }
895 895 else:
896 896 return label
897 897 if not self.parsed:
898 898 self.prepare()
899 899
900 900 diff_lines = self.parsed_diff
901 901 if parsed_lines:
902 902 diff_lines = parsed_lines
903 903
904 904 _html_empty = True
905 905 _html = []
906 906 _html.append('''<table class="%(table_class)s">\n''' % {
907 907 'table_class': table_class
908 908 })
909 909
910 910 for diff in diff_lines:
911 911 for line in diff['chunks']:
912 912 _html_empty = False
913 913 for change in line:
914 914 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
915 915 'lc': line_class,
916 916 'action': change['action']
917 917 })
918 918 anchor_old_id = ''
919 919 anchor_new_id = ''
920 920 anchor_old = "%(filename)s_o%(oldline_no)s" % {
921 921 'filename': self._safe_id(diff['filename']),
922 922 'oldline_no': change['old_lineno']
923 923 }
924 924 anchor_new = "%(filename)s_n%(oldline_no)s" % {
925 925 'filename': self._safe_id(diff['filename']),
926 926 'oldline_no': change['new_lineno']
927 927 }
928 928 cond_old = (change['old_lineno'] != '...' and
929 929 change['old_lineno'])
930 930 cond_new = (change['new_lineno'] != '...' and
931 931 change['new_lineno'])
932 932 if cond_old:
933 933 anchor_old_id = 'id="%s"' % anchor_old
934 934 if cond_new:
935 935 anchor_new_id = 'id="%s"' % anchor_new
936 936
937 937 if change['action'] != Action.CONTEXT:
938 938 anchor_link = True
939 939 else:
940 940 anchor_link = False
941 941
942 942 ###########################################################
943 943 # COMMENT ICONS
944 944 ###########################################################
945 945 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
946 946
947 947 if enable_comments and change['action'] != Action.CONTEXT:
948 948 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
949 949
950 950 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
951 951
952 952 ###########################################################
953 953 # OLD LINE NUMBER
954 954 ###########################################################
955 955 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
956 956 'a_id': anchor_old_id,
957 957 'olc': old_lineno_class
958 958 })
959 959
960 960 _html.append('''%(link)s''' % {
961 961 'link': _link_to_if(anchor_link, change['old_lineno'],
962 962 '#%s' % anchor_old)
963 963 })
964 964 _html.append('''</td>\n''')
965 965 ###########################################################
966 966 # NEW LINE NUMBER
967 967 ###########################################################
968 968
969 969 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
970 970 'a_id': anchor_new_id,
971 971 'nlc': new_lineno_class
972 972 })
973 973
974 974 _html.append('''%(link)s''' % {
975 975 'link': _link_to_if(anchor_link, change['new_lineno'],
976 976 '#%s' % anchor_new)
977 977 })
978 978 _html.append('''</td>\n''')
979 979 ###########################################################
980 980 # CODE
981 981 ###########################################################
982 982 code_classes = [code_class]
983 983 if (not enable_comments or
984 984 change['action'] == Action.CONTEXT):
985 985 code_classes.append('no-comment')
986 986 _html.append('\t<td class="%s">' % ' '.join(code_classes))
987 987 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
988 988 'code': change['line']
989 989 })
990 990
991 991 _html.append('''\t</td>''')
992 992 _html.append('''\n</tr>\n''')
993 993 _html.append('''</table>''')
994 994 if _html_empty:
995 995 return None
996 996 return ''.join(_html)
997 997
998 998 def stat(self):
999 999 """
1000 1000 Returns tuple of added, and removed lines for this instance
1001 1001 """
1002 1002 return self.adds, self.removes
1003 1003
1004 1004 def get_context_of_line(
1005 1005 self, path, diff_line=None, context_before=3, context_after=3):
1006 1006 """
1007 1007 Returns the context lines for the specified diff line.
1008 1008
1009 1009 :type diff_line: :class:`DiffLineNumber`
1010 1010 """
1011 1011 assert self.parsed, "DiffProcessor is not initialized."
1012 1012
1013 1013 if None not in diff_line:
1014 1014 raise ValueError(
1015 1015 "Cannot specify both line numbers: {}".format(diff_line))
1016 1016
1017 1017 file_diff = self._get_file_diff(path)
1018 1018 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1019 1019
1020 1020 first_line_to_include = max(idx - context_before, 0)
1021 1021 first_line_after_context = idx + context_after + 1
1022 1022 context_lines = chunk[first_line_to_include:first_line_after_context]
1023 1023
1024 1024 line_contents = [
1025 1025 _context_line(line) for line in context_lines
1026 1026 if _is_diff_content(line)]
1027 1027 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1028 1028 # Once they are fixed, we can drop this line here.
1029 1029 if line_contents:
1030 1030 line_contents[-1] = (
1031 1031 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
1032 1032 return line_contents
1033 1033
1034 1034 def find_context(self, path, context, offset=0):
1035 1035 """
1036 1036 Finds the given `context` inside of the diff.
1037 1037
1038 1038 Use the parameter `offset` to specify which offset the target line has
1039 1039 inside of the given `context`. This way the correct diff line will be
1040 1040 returned.
1041 1041
1042 1042 :param offset: Shall be used to specify the offset of the main line
1043 1043 within the given `context`.
1044 1044 """
1045 1045 if offset < 0 or offset >= len(context):
1046 1046 raise ValueError(
1047 1047 "Only positive values up to the length of the context "
1048 1048 "minus one are allowed.")
1049 1049
1050 1050 matches = []
1051 1051 file_diff = self._get_file_diff(path)
1052 1052
1053 1053 for chunk in file_diff['chunks']:
1054 1054 context_iter = iter(context)
1055 1055 for line_idx, line in enumerate(chunk):
1056 1056 try:
1057 1057 if _context_line(line) == context_iter.next():
1058 1058 continue
1059 1059 except StopIteration:
1060 1060 matches.append((line_idx, chunk))
1061 1061 context_iter = iter(context)
1062 1062
1063 1063 # Increment position and triger StopIteration
1064 1064 # if we had a match at the end
1065 1065 line_idx += 1
1066 1066 try:
1067 1067 context_iter.next()
1068 1068 except StopIteration:
1069 1069 matches.append((line_idx, chunk))
1070 1070
1071 1071 effective_offset = len(context) - offset
1072 1072 found_at_diff_lines = [
1073 1073 _line_to_diff_line_number(chunk[idx - effective_offset])
1074 1074 for idx, chunk in matches]
1075 1075
1076 1076 return found_at_diff_lines
1077 1077
1078 1078 def _get_file_diff(self, path):
1079 1079 for file_diff in self.parsed_diff:
1080 1080 if file_diff['filename'] == path:
1081 1081 break
1082 1082 else:
1083 1083 raise FileNotInDiffException("File {} not in diff".format(path))
1084 1084 return file_diff
1085 1085
1086 1086 def _find_chunk_line_index(self, file_diff, diff_line):
1087 1087 for chunk in file_diff['chunks']:
1088 1088 for idx, line in enumerate(chunk):
1089 1089 if line['old_lineno'] == diff_line.old:
1090 1090 return chunk, idx
1091 1091 if line['new_lineno'] == diff_line.new:
1092 1092 return chunk, idx
1093 1093 raise LineNotInDiffException(
1094 1094 "The line {} is not part of the diff.".format(diff_line))
1095 1095
1096 1096
1097 1097 def _is_diff_content(line):
1098 1098 return line['action'] in (
1099 1099 Action.UNMODIFIED, Action.ADD, Action.DELETE)
1100 1100
1101 1101
1102 1102 def _context_line(line):
1103 1103 return (line['action'], line['line'])
1104 1104
1105 1105
1106 1106 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1107 1107
1108 1108
1109 1109 def _line_to_diff_line_number(line):
1110 1110 new_line_no = line['new_lineno'] or None
1111 1111 old_line_no = line['old_lineno'] or None
1112 1112 return DiffLineNumber(old=old_line_no, new=new_line_no)
1113 1113
1114 1114
1115 1115 class FileNotInDiffException(Exception):
1116 1116 """
1117 1117 Raised when the context for a missing file is requested.
1118 1118
1119 1119 If you request the context for a line in a file which is not part of the
1120 1120 given diff, then this exception is raised.
1121 1121 """
1122 1122
1123 1123
1124 1124 class LineNotInDiffException(Exception):
1125 1125 """
1126 1126 Raised when the context for a missing line is requested.
1127 1127
1128 1128 If you request the context for a line in a file and this line is not
1129 1129 part of the given diff, then this exception is raised.
1130 1130 """
1131 1131
1132 1132
1133 1133 class DiffLimitExceeded(Exception):
1134 1134 pass
1135 1135
1136 1136
1137 # NOTE(marcink): if diffs.mako change, probably this
1138 # needs a bump to next version
1139 CURRENT_DIFF_VERSION = 'v1'
1140
1141
1142 def _cleanup_cache_file(cached_diff_file):
1143 # cleanup file to not store it "damaged"
1144 try:
1145 os.remove(cached_diff_file)
1146 except Exception:
1147 log.exception('Failed to cleanup path %s', cached_diff_file)
1148
1149
1137 1150 def cache_diff(cached_diff_file, diff, commits):
1138 1151
1139 1152 struct = {
1140 'version': 'v1',
1153 'version': CURRENT_DIFF_VERSION,
1141 1154 'diff': diff,
1142 1155 'commits': commits
1143 1156 }
1144 1157
1145 1158 try:
1146 1159 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1147 1160 pickle.dump(struct, f)
1148 1161 log.debug('Saved diff cache under %s', cached_diff_file)
1149 1162 except Exception:
1150 1163 log.warn('Failed to save cache', exc_info=True)
1151 # cleanup file to not store it "damaged"
1152 try:
1153 os.remove(cached_diff_file)
1154 except Exception:
1155 log.exception('Failed to cleanup path %s', cached_diff_file)
1164 _cleanup_cache_file(cached_diff_file)
1156 1165
1157 1166
1158 1167 def load_cached_diff(cached_diff_file):
1159 1168
1160 1169 default_struct = {
1161 'version': 'v1',
1170 'version': CURRENT_DIFF_VERSION,
1162 1171 'diff': None,
1163 1172 'commits': None
1164 1173 }
1165 1174
1166 1175 has_cache = os.path.isfile(cached_diff_file)
1167 1176 if not has_cache:
1168 1177 return default_struct
1169 1178
1170 1179 data = None
1171 1180 try:
1172 1181 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1173 1182 data = pickle.load(f)
1174 1183 log.debug('Loaded diff cache from %s', cached_diff_file)
1175 1184 except Exception:
1176 1185 log.warn('Failed to read diff cache file', exc_info=True)
1177 1186
1178 1187 if not data:
1179 1188 data = default_struct
1180 1189
1181 1190 if not isinstance(data, dict):
1182 1191 # old version of data ?
1183 1192 data = default_struct
1184 1193
1194 # check version
1195 if data.get('version') != CURRENT_DIFF_VERSION:
1196 # purge cache
1197 _cleanup_cache_file(cached_diff_file)
1198 return default_struct
1199
1185 1200 return data
1186 1201
1187 1202
1188 1203 def generate_diff_cache_key(*args):
1189 1204 """
1190 1205 Helper to generate a cache key using arguments
1191 1206 """
1192 1207 def arg_mapper(input_param):
1193 1208 input_param = safe_str(input_param)
1194 1209 # we cannot allow '/' in arguments since it would allow
1195 1210 # subdirectory usage
1196 1211 input_param.replace('/', '_')
1197 1212 return input_param or None # prevent empty string arguments
1198 1213
1199 1214 return '_'.join([
1200 1215 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1201 1216
1202 1217
1203 1218 def diff_cache_exist(cache_storage, *args):
1204 1219 """
1205 1220 Based on all generated arguments check and return a cache path
1206 1221 """
1207 1222 cache_key = generate_diff_cache_key(*args)
1208 1223 cache_file_path = os.path.join(cache_storage, cache_key)
1209 1224 # prevent path traversal attacks using some param that have e.g '../../'
1210 1225 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1211 1226 raise ValueError('Final path must be within {}'.format(cache_storage))
1212 1227
1213 1228 return cache_file_path
General Comments 0
You need to be logged in to leave comments. Login now