##// END OF EJS Templates
diffs: python3 port
super-admin -
r5083:875cd526 default
parent child Browse files
Show More
@@ -28,7 +28,9 b' import math'
28 import re
28 import re
29 import sys
29 import sys
30 import time
30 import time
31 import urllib.request, urllib.parse, urllib.error
31 import urllib.request
32 import urllib.parse
33 import urllib.error
32
34
33
35
34 class diff_match_patch:
36 class diff_match_patch:
@@ -168,7 +170,7 b' class diff_match_patch:'
168 diffs = [
170 diffs = [
169 (self.DIFF_INSERT, longtext[:i]),
171 (self.DIFF_INSERT, longtext[:i]),
170 (self.DIFF_EQUAL, shorttext),
172 (self.DIFF_EQUAL, shorttext),
171 (self.DIFF_INSERT, longtext[i + len(shorttext) :]),
173 (self.DIFF_INSERT, longtext[i + len(shorttext):]),
172 ]
174 ]
173 # Swap insertions for deletions if diff is reversed.
175 # Swap insertions for deletions if diff is reversed.
174 if len(text1) > len(text2):
176 if len(text1) > len(text2):
@@ -241,7 +243,7 b' class diff_match_patch:'
241 if count_delete >= 1 and count_insert >= 1:
243 if count_delete >= 1 and count_insert >= 1:
242 # Delete the offending records and add the merged ones.
244 # Delete the offending records and add the merged ones.
243 a = self.diff_main(text_delete, text_insert, False, deadline)
245 a = self.diff_main(text_delete, text_insert, False, deadline)
244 diffs[pointer - count_delete - count_insert : pointer] = a
246 diffs[pointer - count_delete - count_insert: pointer] = a
245 pointer = pointer - count_delete - count_insert + len(a)
247 pointer = pointer - count_delete - count_insert + len(a)
246 count_insert = 0
248 count_insert = 0
247 count_delete = 0
249 count_delete = 0
@@ -1929,7 +1931,7 b' class diff_match_patch:'
1929 return patches
1931 return patches
1930 text = textline.split("\n")
1932 text = textline.split("\n")
1931 while len(text) != 0:
1933 while len(text) != 0:
1932 m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1934 m = re.match(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1933 if not m:
1935 if not m:
1934 raise ValueError("Invalid patch string: " + text[0])
1936 raise ValueError("Invalid patch string: " + text[0])
1935 patch = patch_obj()
1937 patch = patch_obj()
@@ -22,14 +22,13 b''
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25 import dataclasses
26 import os
26 import os
27 import re
27 import re
28 import bz2
28 import bz2
29 import gzip
29 import gzip
30 import time
30 import time
31
31
32 import collections
33 import difflib
32 import difflib
34 import logging
33 import logging
35 import pickle
34 import pickle
@@ -37,7 +36,8 b' from itertools import tee'
37
36
38 from rhodecode.lib.vcs.exceptions import VCSError
37 from rhodecode.lib.vcs.exceptions import VCSError
39 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
38 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
40 from rhodecode.lib.utils2 import safe_unicode, safe_str
39 from rhodecode.lib.vcs.backends import base
40 from rhodecode.lib.str_utils import safe_str
41
41
42 log = logging.getLogger(__name__)
42 log = logging.getLogger(__name__)
43
43
@@ -55,10 +55,21 b' def get_diff_whitespace_flag(request):'
55 return request.GET.get('ignorews', '') == '1'
55 return request.GET.get('ignorews', '') == '1'
56
56
57
57
58 class OPS(object):
58 @dataclasses.dataclass
59 ADD = 'A'
59 class OPS:
60 MOD = 'M'
60 ADD: str = 'A'
61 DEL = 'D'
61 MOD: str = 'M'
62 DEL: str = 'D'
63
64
65 @dataclasses.dataclass
66 class DiffLineNumber:
67 old: int | None
68 new: int | None
69
70 def __iter__(self):
71 yield self.old
72 yield self.new
62
73
63
74
64 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
75 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
@@ -79,9 +90,7 b' def get_gitdiff(filenode_old, filenode_n'
79
90
80 for filenode in (filenode_old, filenode_new):
91 for filenode in (filenode_old, filenode_new):
81 if not isinstance(filenode, FileNode):
92 if not isinstance(filenode, FileNode):
82 raise VCSError(
93 raise VCSError(f"Given object should be FileNode object, not {filenode.__class__}")
83 "Given object should be FileNode object, not %s"
84 % filenode.__class__)
85
94
86 repo = filenode_new.commit.repository
95 repo = filenode_new.commit.repository
87 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
96 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
@@ -103,7 +112,7 b' BIN_FILENODE = 7'
103
112
104 class LimitedDiffContainer(object):
113 class LimitedDiffContainer(object):
105
114
106 def __init__(self, diff_limit, cur_diff_size, diff):
115 def __init__(self, diff_limit: int, cur_diff_size, diff):
107 self.diff = diff
116 self.diff = diff
108 self.diff_limit = diff_limit
117 self.diff_limit = diff_limit
109 self.cur_diff_size = cur_diff_size
118 self.cur_diff_size = cur_diff_size
@@ -132,9 +141,9 b' class Action(object):'
132
141
133 class DiffProcessor(object):
142 class DiffProcessor(object):
134 """
143 """
135 Give it a unified or git diff and it returns a list of the files that were
144 Give it a unified or git diff, and it returns a list of the files that were
136 mentioned in the diff together with a dict of meta information that
145 mentioned in the diff together with a dict of meta information that
137 can be used to render it in a HTML template.
146 can be used to render it in an HTML template.
138
147
139 .. note:: Unicode handling
148 .. note:: Unicode handling
140
149
@@ -143,26 +152,26 b' class DiffProcessor(object):'
143 since the result is intended for presentation to the user.
152 since the result is intended for presentation to the user.
144
153
145 """
154 """
146 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
155 _chunk_re = re.compile(br'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
147 _newline_marker = re.compile(r'^\\ No newline at end of file')
156 _newline_marker = re.compile(br'^\\ No newline at end of file')
148
157
149 # used for inline highlighter word split
158 # used for inline highlighter word split
150 _token_re = re.compile(r'()(>|<|&|\W+?)')
159 _token_re = re.compile(br'()(>|<|&|\W+?)')
151
160
152 # collapse ranges of commits over given number
161 # collapse ranges of commits over given number
153 _collapse_commits_over = 5
162 _collapse_commits_over = 5
154
163
155 def __init__(self, diff, format='gitdiff', diff_limit=None,
164 def __init__(self, diff: base.Diff, diff_format='gitdiff', diff_limit: int = 0,
156 file_limit=None, show_full_diff=True):
165 file_limit: int = 0, show_full_diff=True):
157 """
166 """
158 :param diff: A `Diff` object representing a diff from a vcs backend
167 :param diff: A `Diff` object representing a diff from a vcs backend
159 :param format: format of diff passed, `udiff` or `gitdiff`
168 :param diff_format: format of diff passed, `udiff` or `gitdiff`
160 :param diff_limit: define the size of diff that is considered "big"
169 :param diff_limit: define the size of diff that is considered "big"
161 based on that parameter cut off will be triggered, set to None
170 based on that parameter cut off will be triggered, set to None
162 to show full diff
171 to show full diff
163 """
172 """
164 self._diff = diff
173 self._diff = diff
165 self._format = format
174 self._format = diff_format
166 self.adds = 0
175 self.adds = 0
167 self.removes = 0
176 self.removes = 0
168 # calculate diff size
177 # calculate diff size
@@ -173,13 +182,14 b' class DiffProcessor(object):'
173 self.parsed = False
182 self.parsed = False
174 self.parsed_diff = []
183 self.parsed_diff = []
175
184
176 log.debug('Initialized DiffProcessor with %s mode', format)
185 log.debug('Initialized DiffProcessor with %s mode', diff_format)
177 if format == 'gitdiff':
186 self.differ = self._highlight_line_udiff
187 self._parser = self._new_parse_gitdiff
188
189 if diff_format == 'gitdiff':
178 self.differ = self._highlight_line_difflib
190 self.differ = self._highlight_line_difflib
179 self._parser = self._parse_gitdiff
191 self._parser = self._parse_gitdiff
180 else:
192 raise DeprecationWarning('gitdiff usage is deprecated')
181 self.differ = self._highlight_line_udiff
182 self._parser = self._new_parse_gitdiff
183
193
184 def _copy_iterator(self):
194 def _copy_iterator(self):
185 """
195 """
@@ -190,33 +200,33 b' class DiffProcessor(object):'
190 self.__udiff, iterator_copy = tee(self.__udiff)
200 self.__udiff, iterator_copy = tee(self.__udiff)
191 return iterator_copy
201 return iterator_copy
192
202
193 def _escaper(self, string):
203 def _escaper(self, diff_string):
194 """
204 """
195 Escaper for diff escapes special chars and checks the diff limit
205 Escaper for diff escapes special chars and checks the diff limit
196
206
197 :param string:
207 :param string:
198 """
208 """
199 self.cur_diff_size += len(string)
209 self.cur_diff_size += len(diff_string)
200
210
201 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
211 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
202 raise DiffLimitExceeded('Diff Limit Exceeded')
212 raise DiffLimitExceeded('Diff Limit Exceeded')
203
213
204 return string \
214 return diff_string \
205 .replace('&', '&')\
215 .replace(b'&', b'&')\
206 .replace('<', '&lt;')\
216 .replace(b'<', b'&lt;')\
207 .replace('>', '&gt;')
217 .replace(b'>', b'&gt;')
208
218
209 def _line_counter(self, l):
219 def _line_counter(self, diff_line):
210 """
220 """
211 Checks each line and bumps total adds/removes for this diff
221 Checks each line and bumps total adds/removes for this diff
212
222
213 :param l:
223 :param diff_line:
214 """
224 """
215 if l.startswith('+') and not l.startswith('+++'):
225 if diff_line.startswith(b'+') and not diff_line.startswith(b'+++'):
216 self.adds += 1
226 self.adds += 1
217 elif l.startswith('-') and not l.startswith('---'):
227 elif diff_line.startswith(b'-') and not diff_line.startswith(b'---'):
218 self.removes += 1
228 self.removes += 1
219 return safe_unicode(l)
229 return diff_line
220
230
221 def _highlight_line_difflib(self, line, next_):
231 def _highlight_line_difflib(self, line, next_):
222 """
232 """
@@ -238,9 +248,9 b' class DiffProcessor(object):'
238 newfrag = ''.join(newwords[j1:j2])
248 newfrag = ''.join(newwords[j1:j2])
239 if tag != 'equal':
249 if tag != 'equal':
240 if oldfrag:
250 if oldfrag:
241 oldfrag = '<del>%s</del>' % oldfrag
251 oldfrag = f'<del>{oldfrag}</del>'
242 if newfrag:
252 if newfrag:
243 newfrag = '<ins>%s</ins>' % newfrag
253 newfrag = f'<ins>{newfrag}</ins>'
244 oldfragments.append(oldfrag)
254 oldfragments.append(oldfrag)
245 newfragments.append(newfrag)
255 newfragments.append(newfrag)
246
256
@@ -267,17 +277,11 b' class DiffProcessor(object):'
267 tag = 'ins'
277 tag = 'ins'
268 else:
278 else:
269 tag = 'del'
279 tag = 'del'
270 l['line'] = '%s<%s>%s</%s>%s' % (
280 l['line'] = f"{l['line'][:start]}<{tag}>{l['line'][start:last]}</{tag}>{l['line'][last:]}"
271 l['line'][:start],
272 tag,
273 l['line'][start:last],
274 tag,
275 l['line'][last:]
276 )
277 do(line)
281 do(line)
278 do(next_)
282 do(next_)
279
283
280 def _clean_line(self, line, command):
284 def _clean_line(self, line, command: str):
281 if command in ['+', '-', ' ']:
285 if command in ['+', '-', ' ']:
282 # only modify the line if it's actually a diff thing
286 # only modify the line if it's actually a diff thing
283 line = line[1:]
287 line = line[1:]
@@ -285,7 +289,9 b' class DiffProcessor(object):'
285
289
286 def _parse_gitdiff(self, inline_diff=True):
290 def _parse_gitdiff(self, inline_diff=True):
287 _files = []
291 _files = []
288 diff_container = lambda arg: arg
292
293 def diff_container(arg):
294 return arg
289
295
290 for chunk in self._diff.chunks():
296 for chunk in self._diff.chunks():
291 head = chunk.header
297 head = chunk.header
@@ -311,30 +317,24 b' class DiffProcessor(object):'
311 elif head['new_file_mode']:
317 elif head['new_file_mode']:
312 op = OPS.ADD
318 op = OPS.ADD
313 stats['binary'] = True
319 stats['binary'] = True
314 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
320 stats['ops'][NEW_FILENODE] = f"new file {safe_str(head['new_file_mode'])}"
315 else: # modify operation, can be copy, rename or chmod
321 else: # modify operation, can be: copy, rename or chmod
316
322
317 # CHMOD
323 # CHMOD
318 if head['new_mode'] and head['old_mode']:
324 if head['new_mode'] and head['old_mode']:
319 op = OPS.MOD
325 op = OPS.MOD
320 stats['binary'] = True
326 stats['binary'] = True
321 stats['ops'][CHMOD_FILENODE] = (
327 stats['ops'][CHMOD_FILENODE] = f"modified file chmod {safe_str(head['old_mode'])} => {safe_str(head['new_mode'])}"
322 'modified file chmod %s => %s' % (
323 head['old_mode'], head['new_mode']))
324 # RENAME
328 # RENAME
325 if head['rename_from'] != head['rename_to']:
329 if head['rename_from'] != head['rename_to']:
326 op = OPS.MOD
330 op = OPS.MOD
327 stats['binary'] = True
331 stats['binary'] = True
328 stats['ops'][RENAMED_FILENODE] = (
332 stats['ops'][RENAMED_FILENODE] = f"file renamed from {safe_str(head['rename_from'])} to {safe_str(head['rename_to'])}"
329 'file renamed from %s to %s' % (
330 head['rename_from'], head['rename_to']))
331 # COPY
333 # COPY
332 if head.get('copy_from') and head.get('copy_to'):
334 if head.get('copy_from') and head.get('copy_to'):
333 op = OPS.MOD
335 op = OPS.MOD
334 stats['binary'] = True
336 stats['binary'] = True
335 stats['ops'][COPIED_FILENODE] = (
337 stats['ops'][COPIED_FILENODE] = f"file copied from {safe_str(head['copy_from'])} to {safe_str(head['copy_to'])}"
336 'file copied from %s to %s' % (
337 head['copy_from'], head['copy_to']))
338
338
339 # If our new parsed headers didn't match anything fallback to
339 # If our new parsed headers didn't match anything fallback to
340 # old style detection
340 # old style detection
@@ -376,9 +376,8 b' class DiffProcessor(object):'
376 raise DiffLimitExceeded('File Limit Exceeded')
376 raise DiffLimitExceeded('File Limit Exceeded')
377
377
378 except DiffLimitExceeded:
378 except DiffLimitExceeded:
379 diff_container = lambda _diff: \
379 def diff_container(_diff):
380 LimitedDiffContainer(
380 return LimitedDiffContainer(self.diff_limit, self.cur_diff_size, _diff)
381 self.diff_limit, self.cur_diff_size, _diff)
382
381
383 exceeds_limit = len(raw_diff) > self.file_limit
382 exceeds_limit = len(raw_diff) > self.file_limit
384 limited_diff = True
383 limited_diff = True
@@ -387,7 +386,7 b' class DiffProcessor(object):'
387 else: # GIT format binary patch, or possibly empty diff
386 else: # GIT format binary patch, or possibly empty diff
388 if head['bin_patch']:
387 if head['bin_patch']:
389 # we have operation already extracted, but we mark simply
388 # we have operation already extracted, but we mark simply
390 # it's a diff we wont show for binary files
389 # it's a diff we won't show for binary files
391 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
390 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
392 chunks = []
391 chunks = []
393
392
@@ -397,31 +396,33 b' class DiffProcessor(object):'
397 # to see the content of the file
396 # to see the content of the file
398 chunks = []
397 chunks = []
399
398
400 chunks.insert(0, [{
399 frag = [{
401 'old_lineno': '',
400 'old_lineno': '',
402 'new_lineno': '',
401 'new_lineno': '',
403 'action': Action.CONTEXT,
402 'action': Action.CONTEXT,
404 'line': msg,
403 'line': msg,
405 } for _op, msg in stats['ops'].items()
404 } for _op, msg in list(stats['ops'].items())
406 if _op not in [MOD_FILENODE]])
405 if _op not in [MOD_FILENODE]]
406
407 chunks.insert(0, frag)
407
408
408 _files.append({
409 _files.append({
409 'filename': safe_unicode(head['b_path']),
410 'filename': safe_str(head['b_path']),
410 'old_revision': head['a_blob_id'],
411 'old_revision': head['a_blob_id'],
411 'new_revision': head['b_blob_id'],
412 'new_revision': head['b_blob_id'],
412 'chunks': chunks,
413 'chunks': chunks,
413 'raw_diff': safe_unicode(raw_diff),
414 'raw_diff': safe_str(raw_diff),
414 'operation': op,
415 'operation': op,
415 'stats': stats,
416 'stats': stats,
416 'exceeds_limit': exceeds_limit,
417 'exceeds_limit': exceeds_limit,
417 'is_limited_diff': limited_diff,
418 'is_limited_diff': limited_diff,
418 })
419 })
419
420
420 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
421 def operation_sorter(info):
421 OPS.DEL: 2}.get(info['operation'])
422 return {OPS.ADD: 0, OPS.MOD: 1, OPS.DEL: 2}.get(info['operation'])
422
423
423 if not inline_diff:
424 if not inline_diff:
424 return diff_container(sorted(_files, key=sorter))
425 return diff_container(sorted(_files, key=operation_sorter))
425
426
426 # highlight inline changes
427 # highlight inline changes
427 for diff_data in _files:
428 for diff_data in _files:
@@ -440,24 +441,25 b' class DiffProcessor(object):'
440 except StopIteration:
441 except StopIteration:
441 pass
442 pass
442
443
443 return diff_container(sorted(_files, key=sorter))
444 return diff_container(sorted(_files, key=operation_sorter))
444
445
445 def _check_large_diff(self):
446 def _check_large_diff(self):
446 if self.diff_limit:
447 if self.diff_limit:
447 log.debug('Checking if diff exceeds current diff_limit of %s', self.diff_limit)
448 log.debug('Checking if diff exceeds current diff_limit of %s', self.diff_limit)
448 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
449 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
449 raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
450 raise DiffLimitExceeded(f'Diff Limit `{self.diff_limit}` Exceeded')
450
451
451 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
452 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
452 def _new_parse_gitdiff(self, inline_diff=True):
453 def _new_parse_gitdiff(self, inline_diff=True):
453 _files = []
454 _files = []
454
455
455 # this can be overriden later to a LimitedDiffContainer type
456 # this can be overridden later to a LimitedDiffContainer type
456 diff_container = lambda arg: arg
457 def diff_container(arg):
458 return arg
457
459
458 for chunk in self._diff.chunks():
460 for chunk in self._diff.chunks():
459 head = chunk.header
461 head = chunk.header_as_str
460 log.debug('parsing diff %r', head)
462 log.debug('parsing diff chunk %r', chunk)
461
463
462 raw_diff = chunk.raw
464 raw_diff = chunk.raw
463 limited_diff = False
465 limited_diff = False
@@ -468,8 +470,8 b' class DiffProcessor(object):'
468 'added': 0,
470 'added': 0,
469 'deleted': 0,
471 'deleted': 0,
470 'binary': False,
472 'binary': False,
471 'old_mode': None,
473 'old_mode': '',
472 'new_mode': None,
474 'new_mode': '',
473 'ops': {},
475 'ops': {},
474 }
476 }
475 if head['old_mode']:
477 if head['old_mode']:
@@ -489,36 +491,30 b' class DiffProcessor(object):'
489 elif head['new_file_mode']:
491 elif head['new_file_mode']:
490 op = OPS.ADD
492 op = OPS.ADD
491 stats['binary'] = True
493 stats['binary'] = True
492 stats['old_mode'] = None
494 stats['old_mode'] = ''
493 stats['new_mode'] = head['new_file_mode']
495 stats['new_mode'] = head['new_file_mode']
494 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
496 stats['ops'][NEW_FILENODE] = f"new file {head['new_file_mode']}"
495
497
496 # modify operation, can be copy, rename or chmod
498 # modify operation, can be: copy, rename or chmod
497 else:
499 else:
498 # CHMOD
500 # CHMOD
499 if head['new_mode'] and head['old_mode']:
501 if head['new_mode'] and head['old_mode']:
500 op = OPS.MOD
502 op = OPS.MOD
501 stats['binary'] = True
503 stats['binary'] = True
502 stats['ops'][CHMOD_FILENODE] = (
504 stats['ops'][CHMOD_FILENODE] = f"modified file chmod {head['old_mode']} => {head['new_mode']}"
503 'modified file chmod %s => %s' % (
504 head['old_mode'], head['new_mode']))
505
505
506 # RENAME
506 # RENAME
507 if head['rename_from'] != head['rename_to']:
507 if head['rename_from'] != head['rename_to']:
508 op = OPS.MOD
508 op = OPS.MOD
509 stats['binary'] = True
509 stats['binary'] = True
510 stats['renamed'] = (head['rename_from'], head['rename_to'])
510 stats['renamed'] = (head['rename_from'], head['rename_to'])
511 stats['ops'][RENAMED_FILENODE] = (
511 stats['ops'][RENAMED_FILENODE] = f"file renamed from {head['rename_from']} to {head['rename_to']}"
512 'file renamed from %s to %s' % (
513 head['rename_from'], head['rename_to']))
514 # COPY
512 # COPY
515 if head.get('copy_from') and head.get('copy_to'):
513 if head.get('copy_from') and head.get('copy_to'):
516 op = OPS.MOD
514 op = OPS.MOD
517 stats['binary'] = True
515 stats['binary'] = True
518 stats['copied'] = (head['copy_from'], head['copy_to'])
516 stats['copied'] = (head['copy_from'], head['copy_to'])
519 stats['ops'][COPIED_FILENODE] = (
517 stats['ops'][COPIED_FILENODE] = f"file copied from {head['copy_from']} to {head['copy_to']}"
520 'file copied from %s to %s' % (
521 head['copy_from'], head['copy_to']))
522
518
523 # If our new parsed headers didn't match anything fallback to
519 # If our new parsed headers didn't match anything fallback to
524 # old style detection
520 # old style detection
@@ -558,9 +554,8 b' class DiffProcessor(object):'
558 # but the browser is the bottleneck.
554 # but the browser is the bottleneck.
559 if not self.show_full_diff and exceeds_limit:
555 if not self.show_full_diff and exceeds_limit:
560 log.debug('File `%s` exceeds current file_limit of %s',
556 log.debug('File `%s` exceeds current file_limit of %s',
561 safe_unicode(head['b_path']), self.file_limit)
557 head['b_path'], self.file_limit)
562 raise DiffLimitExceeded(
558 raise DiffLimitExceeded(f'File Limit {self.file_limit} Exceeded')
563 'File Limit %s Exceeded', self.file_limit)
564
559
565 self._check_large_diff()
560 self._check_large_diff()
566
561
@@ -573,9 +568,11 b' class DiffProcessor(object):'
573 stats['ops'][MOD_FILENODE] = 'modified file'
568 stats['ops'][MOD_FILENODE] = 'modified file'
574
569
575 except DiffLimitExceeded:
570 except DiffLimitExceeded:
576 diff_container = lambda _diff: \
571 def limited_diff_container(_diff):
577 LimitedDiffContainer(
572 return LimitedDiffContainer(self.diff_limit, self.cur_diff_size, _diff)
578 self.diff_limit, self.cur_diff_size, _diff)
573
574 # re-definition of our container wrapper
575 diff_container = limited_diff_container
579
576
580 limited_diff = True
577 limited_diff = True
581 chunks = []
578 chunks = []
@@ -583,7 +580,7 b' class DiffProcessor(object):'
583 else: # GIT format binary patch, or possibly empty diff
580 else: # GIT format binary patch, or possibly empty diff
584 if head['bin_patch']:
581 if head['bin_patch']:
585 # we have operation already extracted, but we mark simply
582 # we have operation already extracted, but we mark simply
586 # it's a diff we wont show for binary files
583 # it's a diff we won't show for binary files
587 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
584 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
588 chunks = []
585 chunks = []
589
586
@@ -594,31 +591,32 b' class DiffProcessor(object):'
594 # to see the content of the file
591 # to see the content of the file
595 chunks = []
592 chunks = []
596
593
597 chunks.insert(
594 frag = [
598 0, [{'old_lineno': '',
595 {'old_lineno': '',
599 'new_lineno': '',
596 'new_lineno': '',
600 'action': Action.CONTEXT,
597 'action': Action.CONTEXT,
601 'line': msg,
598 'line': msg,
602 } for _op, msg in stats['ops'].items()
599 } for _op, msg in list(stats['ops'].items())
603 if _op not in [MOD_FILENODE]])
600 if _op not in [MOD_FILENODE]]
604
601
605 original_filename = safe_unicode(head['a_path'])
602 chunks.insert(0, frag)
603
604 original_filename = safe_str(head['a_path'])
606 _files.append({
605 _files.append({
607 'original_filename': original_filename,
606 'original_filename': original_filename,
608 'filename': safe_unicode(head['b_path']),
607 'filename': safe_str(head['b_path']),
609 'old_revision': head['a_blob_id'],
608 'old_revision': head['a_blob_id'],
610 'new_revision': head['b_blob_id'],
609 'new_revision': head['b_blob_id'],
611 'chunks': chunks,
610 'chunks': chunks,
612 'raw_diff': safe_unicode(raw_diff),
611 'raw_diff': safe_str(raw_diff),
613 'operation': op,
612 'operation': op,
614 'stats': stats,
613 'stats': stats,
615 'exceeds_limit': exceeds_limit,
614 'exceeds_limit': exceeds_limit,
616 'is_limited_diff': limited_diff,
615 'is_limited_diff': limited_diff,
617 })
616 })
618
617
619 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
618 def sorter(info):
620 OPS.DEL: 2}.get(info['operation'])
619 return {OPS.ADD: 0, OPS.MOD: 1, OPS.DEL: 2}.get(info['operation'])
621
622 return diff_container(sorted(_files, key=sorter))
620 return diff_container(sorted(_files, key=sorter))
623
621
624 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
622 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
@@ -667,20 +665,20 b' class DiffProcessor(object):'
667 line = next(diff_iter)
665 line = next(diff_iter)
668
666
669 while old_line < old_end or new_line < new_end:
667 while old_line < old_end or new_line < new_end:
670 command = ' '
668 command = b' '
671 if line:
669 if line:
672 command = line[0]
670 command = line[0]
673
671
674 affects_old = affects_new = False
672 affects_old = affects_new = False
675
673
676 # ignore those if we don't expect them
674 # ignore those if we don't expect them
677 if command in '#@':
675 if command in b'#@':
678 continue
676 continue
679 elif command == '+':
677 elif command == b'+':
680 affects_new = True
678 affects_new = True
681 action = Action.ADD
679 action = Action.ADD
682 stats[0] += 1
680 stats[0] += 1
683 elif command == '-':
681 elif command == b'-':
684 affects_old = True
682 affects_old = True
685 action = Action.DELETE
683 action = Action.DELETE
686 stats[1] += 1
684 stats[1] += 1
@@ -692,8 +690,8 b' class DiffProcessor(object):'
692 old_line += affects_old
690 old_line += affects_old
693 new_line += affects_new
691 new_line += affects_new
694 lines.append({
692 lines.append({
695 'old_lineno': affects_old and old_line or '',
693 'old_lineno': affects_old and old_line or b'',
696 'new_lineno': affects_new and new_line or '',
694 'new_lineno': affects_new and new_line or b'',
697 'action': action,
695 'action': action,
698 'line': self._clean_line(line, command)
696 'line': self._clean_line(line, command)
699 })
697 })
@@ -727,6 +725,7 b' class DiffProcessor(object):'
727
725
728 try:
726 try:
729 line = next(diff_iter)
727 line = next(diff_iter)
728 assert isinstance(line, bytes)
730
729
731 while line:
730 while line:
732 raw_diff.append(line)
731 raw_diff.append(line)
@@ -737,6 +736,7 b' class DiffProcessor(object):'
737 break
736 break
738
737
739 gr = match.groups()
738 gr = match.groups()
739
740 (old_line, old_end,
740 (old_line, old_end,
741 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
741 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
742
742
@@ -754,7 +754,7 b' class DiffProcessor(object):'
754 old_line -= 1
754 old_line -= 1
755 new_line -= 1
755 new_line -= 1
756
756
757 context = len(gr) == 5
757 len(gr) == 5
758 old_end += old_line
758 old_end += old_line
759 new_end += new_line
759 new_end += new_line
760
760
@@ -763,7 +763,8 b' class DiffProcessor(object):'
763 while old_line < old_end or new_line < new_end:
763 while old_line < old_end or new_line < new_end:
764 command = ' '
764 command = ' '
765 if line:
765 if line:
766 command = line[0]
766 # This is bytes, so we need to convert it to a str
767 command: str = chr(line[0])
767
768
768 affects_old = affects_new = False
769 affects_old = affects_new = False
769
770
@@ -786,8 +787,8 b' class DiffProcessor(object):'
786 old_line += affects_old
787 old_line += affects_old
787 new_line += affects_new
788 new_line += affects_new
788 lines.append({
789 lines.append({
789 'old_lineno': affects_old and old_line or '',
790 'old_lineno': affects_old and old_line or None,
790 'new_lineno': affects_new and new_line or '',
791 'new_lineno': affects_new and new_line or None,
791 'action': action,
792 'action': action,
792 'line': self._clean_line(line, command)
793 'line': self._clean_line(line, command)
793 })
794 })
@@ -815,7 +816,7 b' class DiffProcessor(object):'
815 except StopIteration:
816 except StopIteration:
816 pass
817 pass
817
818
818 return ''.join(raw_diff), chunks, stats
819 return b''.join(raw_diff), chunks, stats
819
820
820 def _safe_id(self, idstring):
821 def _safe_id(self, idstring):
821 """Make a string safe for including in an id attribute.
822 """Make a string safe for including in an id attribute.
@@ -833,24 +834,24 b' class DiffProcessor(object):'
833
834
834 """
835 """
835 # Transform all whitespace to underscore
836 # Transform all whitespace to underscore
836 idstring = re.sub(r'\s', "_", '%s' % idstring)
837 idstring = re.sub(r'\s', "_", f'{idstring}')
837 # Remove everything that is not a hyphen or a member of \w
838 # Remove everything that is not a hyphen or a member of \w
838 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
839 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
839 return idstring
840 return idstring
840
841
841 @classmethod
842 @classmethod
842 def diff_splitter(cls, string):
843 def diff_splitter(cls, diff_string: bytes):
843 """
844 """
844 Diff split that emulates .splitlines() but works only on \n
845 Diff split that emulates .splitlines() but works only on \n
845 """
846 """
846 if not string:
847 if not diff_string:
847 return
848 return
848 elif string == '\n':
849 elif diff_string == b'\n':
849 yield '\n'
850 yield b'\n'
850 else:
851 else:
851
852
852 has_newline = string.endswith('\n')
853 has_newline = diff_string.endswith(b'\n')
853 elements = string.split('\n')
854 elements = diff_string.split(b'\n')
854 if has_newline:
855 if has_newline:
855 # skip last element as it's empty string from newlines
856 # skip last element as it's empty string from newlines
856 elements = elements[:-1]
857 elements = elements[:-1]
@@ -860,9 +861,9 b' class DiffProcessor(object):'
860 for cnt, line in enumerate(elements, start=1):
861 for cnt, line in enumerate(elements, start=1):
861 last_line = cnt == len_elements
862 last_line = cnt == len_elements
862 if last_line and not has_newline:
863 if last_line and not has_newline:
863 yield safe_unicode(line)
864 yield line
864 else:
865 else:
865 yield safe_unicode(line) + '\n'
866 yield line + b'\n'
866
867
867 def prepare(self, inline_diff=True):
868 def prepare(self, inline_diff=True):
868 """
869 """
@@ -879,132 +880,7 b' class DiffProcessor(object):'
879 """
880 """
880 Returns raw diff as a byte string
881 Returns raw diff as a byte string
881 """
882 """
882 return self._diff.raw
883 return self._diff.raw.tobytes()
883
884 def as_html(self, table_class='code-difftable', line_class='line',
885 old_lineno_class='lineno old', new_lineno_class='lineno new',
886 code_class='code', enable_comments=False, parsed_lines=None):
887 """
888 Return given diff as html table with customized css classes
889 """
890 # TODO(marcink): not sure how to pass in translator
891 # here in an efficient way, leave the _ for proper gettext extraction
892 _ = lambda s: s
893
894 def _link_to_if(condition, label, url):
895 """
896 Generates a link if condition is meet or just the label if not.
897 """
898
899 if condition:
900 return '''<a href="%(url)s" class="tooltip"
901 title="%(title)s">%(label)s</a>''' % {
902 'title': _('Click to select line'),
903 'url': url,
904 'label': label
905 }
906 else:
907 return label
908 if not self.parsed:
909 self.prepare()
910
911 diff_lines = self.parsed_diff
912 if parsed_lines:
913 diff_lines = parsed_lines
914
915 _html_empty = True
916 _html = []
917 _html.append('''<table class="%(table_class)s">\n''' % {
918 'table_class': table_class
919 })
920
921 for diff in diff_lines:
922 for line in diff['chunks']:
923 _html_empty = False
924 for change in line:
925 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
926 'lc': line_class,
927 'action': change['action']
928 })
929 anchor_old_id = ''
930 anchor_new_id = ''
931 anchor_old = "%(filename)s_o%(oldline_no)s" % {
932 'filename': self._safe_id(diff['filename']),
933 'oldline_no': change['old_lineno']
934 }
935 anchor_new = "%(filename)s_n%(oldline_no)s" % {
936 'filename': self._safe_id(diff['filename']),
937 'oldline_no': change['new_lineno']
938 }
939 cond_old = (change['old_lineno'] != '...' and
940 change['old_lineno'])
941 cond_new = (change['new_lineno'] != '...' and
942 change['new_lineno'])
943 if cond_old:
944 anchor_old_id = 'id="%s"' % anchor_old
945 if cond_new:
946 anchor_new_id = 'id="%s"' % anchor_new
947
948 if change['action'] != Action.CONTEXT:
949 anchor_link = True
950 else:
951 anchor_link = False
952
953 ###########################################################
954 # COMMENT ICONS
955 ###########################################################
956 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
957
958 if enable_comments and change['action'] != Action.CONTEXT:
959 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
960
961 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
962
963 ###########################################################
964 # OLD LINE NUMBER
965 ###########################################################
966 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
967 'a_id': anchor_old_id,
968 'olc': old_lineno_class
969 })
970
971 _html.append('''%(link)s''' % {
972 'link': _link_to_if(anchor_link, change['old_lineno'],
973 '#%s' % anchor_old)
974 })
975 _html.append('''</td>\n''')
976 ###########################################################
977 # NEW LINE NUMBER
978 ###########################################################
979
980 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
981 'a_id': anchor_new_id,
982 'nlc': new_lineno_class
983 })
984
985 _html.append('''%(link)s''' % {
986 'link': _link_to_if(anchor_link, change['new_lineno'],
987 '#%s' % anchor_new)
988 })
989 _html.append('''</td>\n''')
990 ###########################################################
991 # CODE
992 ###########################################################
993 code_classes = [code_class]
994 if (not enable_comments or
995 change['action'] == Action.CONTEXT):
996 code_classes.append('no-comment')
997 _html.append('\t<td class="%s">' % ' '.join(code_classes))
998 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
999 'code': change['line']
1000 })
1001
1002 _html.append('''\t</td>''')
1003 _html.append('''\n</tr>\n''')
1004 _html.append('''</table>''')
1005 if _html_empty:
1006 return None
1007 return ''.join(_html)
1008
884
1009 def stat(self):
885 def stat(self):
1010 """
886 """
@@ -1013,33 +889,32 b' class DiffProcessor(object):'
1013 return self.adds, self.removes
889 return self.adds, self.removes
1014
890
1015 def get_context_of_line(
891 def get_context_of_line(
1016 self, path, diff_line=None, context_before=3, context_after=3):
892 self, path, diff_line: DiffLineNumber = None, context_before: int = 3, context_after: int = 3):
1017 """
893 """
1018 Returns the context lines for the specified diff line.
894 Returns the context lines for the specified diff line.
1019
1020 :type diff_line: :class:`DiffLineNumber`
1021 """
895 """
1022 assert self.parsed, "DiffProcessor is not initialized."
896 assert self.parsed, "DiffProcessor is not initialized."
1023
897
1024 if None not in diff_line:
898 if None not in diff_line:
1025 raise ValueError(
899 raise ValueError(f"Cannot specify both line numbers in diff_line: {diff_line}")
1026 "Cannot specify both line numbers: {}".format(diff_line))
1027
900
1028 file_diff = self._get_file_diff(path)
901 file_diff = self._get_file_diff(path)
1029 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
902 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
1030
903
1031 first_line_to_include = max(idx - context_before, 0)
904 first_line_to_include = max(idx - context_before, 0)
1032 first_line_after_context = idx + context_after + 1
905 first_line_after_context = idx + context_after + 1
1033 context_lines = chunk[first_line_to_include:first_line_after_context]
906 context_lines = chunk['lines'][first_line_to_include:first_line_after_context]
1034
907
1035 line_contents = [
908 line_contents = [
1036 _context_line(line) for line in context_lines
909 _context_line(line) for line in context_lines
1037 if _is_diff_content(line)]
910 if _is_diff_content(line)
911 ]
912
1038 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
913 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
1039 # Once they are fixed, we can drop this line here.
914 # Once they are fixed, we can drop this line here.
1040 if line_contents:
915 if line_contents:
1041 line_contents[-1] = (
916 line_contents[-1] = (
1042 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
917 line_contents[-1][0], line_contents[-1][1].rstrip(b'\n') + b'\n')
1043 return line_contents
918 return line_contents
1044
919
1045 def find_context(self, path, context, offset=0):
920 def find_context(self, path, context, offset=0):
@@ -1062,8 +937,10 b' class DiffProcessor(object):'
1062 file_diff = self._get_file_diff(path)
937 file_diff = self._get_file_diff(path)
1063
938
1064 for chunk in file_diff['chunks']:
939 for chunk in file_diff['chunks']:
940 if not isinstance(chunk, dict):
941 continue
1065 context_iter = iter(context)
942 context_iter = iter(context)
1066 for line_idx, line in enumerate(chunk):
943 for line_idx, line in enumerate(chunk['lines']):
1067 try:
944 try:
1068 if _context_line(line) == next(context_iter):
945 if _context_line(line) == next(context_iter):
1069 continue
946 continue
@@ -1081,7 +958,7 b' class DiffProcessor(object):'
1081
958
1082 effective_offset = len(context) - offset
959 effective_offset = len(context) - offset
1083 found_at_diff_lines = [
960 found_at_diff_lines = [
1084 _line_to_diff_line_number(chunk[idx - effective_offset])
961 _line_to_diff_line_number(chunk['lines'][idx - effective_offset])
1085 for idx, chunk in matches]
962 for idx, chunk in matches]
1086
963
1087 return found_at_diff_lines
964 return found_at_diff_lines
@@ -1091,18 +968,19 b' class DiffProcessor(object):'
1091 if file_diff['filename'] == path:
968 if file_diff['filename'] == path:
1092 break
969 break
1093 else:
970 else:
1094 raise FileNotInDiffException("File {} not in diff".format(path))
971 raise FileNotInDiffException(f"File {path} not in diff")
1095 return file_diff
972 return file_diff
1096
973
1097 def _find_chunk_line_index(self, file_diff, diff_line):
974 def _find_chunk_line_index(self, file_diff, diff_line):
1098 for chunk in file_diff['chunks']:
975 for chunk in file_diff['chunks']:
1099 for idx, line in enumerate(chunk):
976 if not isinstance(chunk, dict):
1100 if line['old_lineno'] == diff_line.old:
977 continue
1101 return chunk, idx
978 for line_idx, line in enumerate(chunk['lines']):
1102 if line['new_lineno'] == diff_line.new:
979 if diff_line.old and line['old_lineno'] == diff_line.old:
1103 return chunk, idx
980 return chunk, line_idx
1104 raise LineNotInDiffException(
981 if diff_line.new and line['new_lineno'] == diff_line.new:
1105 "The line {} is not part of the diff.".format(diff_line))
982 return chunk, line_idx
983 raise LineNotInDiffException(f"The line {diff_line} is not part of the diff.")
1106
984
1107
985
1108 def _is_diff_content(line):
986 def _is_diff_content(line):
@@ -1111,10 +989,7 b' def _is_diff_content(line):'
1111
989
1112
990
1113 def _context_line(line):
991 def _context_line(line):
1114 return (line['action'], line['line'])
992 return line['action'], line['line']
1115
1116
1117 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
1118
993
1119
994
1120 def _line_to_diff_line_number(line):
995 def _line_to_diff_line_number(line):
@@ -1188,7 +1063,7 b' def cache_diff(cached_diff_file, diff, c'
1188 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1063 with bz2.BZ2File(cached_diff_file, 'wb') as f:
1189 pickle.dump(struct, f)
1064 pickle.dump(struct, f)
1190 except Exception:
1065 except Exception:
1191 log.warn('Failed to save cache', exc_info=True)
1066 log.warning('Failed to save cache', exc_info=True)
1192 _cleanup_cache_file(cached_diff_file)
1067 _cleanup_cache_file(cached_diff_file)
1193
1068
1194 log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)
1069 log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)
@@ -1222,7 +1097,7 b' def load_cached_diff(cached_diff_file):'
1222 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1097 with bz2.BZ2File(cached_diff_file, 'rb') as f:
1223 data = pickle.load(f)
1098 data = pickle.load(f)
1224 except Exception:
1099 except Exception:
1225 log.warn('Failed to read diff cache file', exc_info=True)
1100 log.warning('Failed to read diff cache file', exc_info=True)
1226
1101
1227 if not data:
1102 if not data:
1228 data = default_struct
1103 data = default_struct
@@ -1254,7 +1129,7 b' def generate_diff_cache_key(*args):'
1254 return input_param or None # prevent empty string arguments
1129 return input_param or None # prevent empty string arguments
1255
1130
1256 return '_'.join([
1131 return '_'.join([
1257 '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
1132 '{}' for _i in range(len(args))]).format(*list(map(arg_mapper, args)))
1258
1133
1259
1134
1260 def diff_cache_exist(cache_storage, *args):
1135 def diff_cache_exist(cache_storage, *args):
@@ -1266,6 +1141,6 b' def diff_cache_exist(cache_storage, *arg'
1266 cache_file_path = os.path.join(cache_storage, cache_key)
1141 cache_file_path = os.path.join(cache_storage, cache_key)
1267 # prevent path traversal attacks using some param that have e.g '../../'
1142 # prevent path traversal attacks using some param that have e.g '../../'
1268 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1143 if not os.path.abspath(cache_file_path).startswith(cache_storage):
1269 raise ValueError('Final path must be within {}'.format(cache_storage))
1144 raise ValueError(f'Final path must be within {cache_storage}')
1270
1145
1271 return cache_file_path
1146 return cache_file_path
General Comments 0
You need to be logged in to leave comments. Login now