Show More
@@ -28,7 +28,9 b' import math' | |||
|
28 | 28 | import re |
|
29 | 29 | import sys |
|
30 | 30 | import time |
|
31 |
import urllib.request |
|
|
31 | import urllib.request | |
|
32 | import urllib.parse | |
|
33 | import urllib.error | |
|
32 | 34 | |
|
33 | 35 | |
|
34 | 36 | class diff_match_patch: |
@@ -1929,7 +1931,7 b' class diff_match_patch:' | |||
|
1929 | 1931 | return patches |
|
1930 | 1932 | text = textline.split("\n") |
|
1931 | 1933 | while len(text) != 0: |
|
1932 | m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) | |
|
1934 | m = re.match(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) | |
|
1933 | 1935 | if not m: |
|
1934 | 1936 | raise ValueError("Invalid patch string: " + text[0]) |
|
1935 | 1937 | patch = patch_obj() |
@@ -22,14 +22,13 b'' | |||
|
22 | 22 | """ |
|
23 | 23 | Set of diffing helpers, previously part of vcs |
|
24 | 24 | """ |
|
25 | ||
|
25 | import dataclasses | |
|
26 | 26 | import os |
|
27 | 27 | import re |
|
28 | 28 | import bz2 |
|
29 | 29 | import gzip |
|
30 | 30 | import time |
|
31 | 31 | |
|
32 | import collections | |
|
33 | 32 | import difflib |
|
34 | 33 | import logging |
|
35 | 34 | import pickle |
@@ -37,7 +36,8 b' from itertools import tee' | |||
|
37 | 36 | |
|
38 | 37 | from rhodecode.lib.vcs.exceptions import VCSError |
|
39 | 38 | from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode |
|
40 |
from rhodecode.lib. |
|
|
39 | from rhodecode.lib.vcs.backends import base | |
|
40 | from rhodecode.lib.str_utils import safe_str | |
|
41 | 41 | |
|
42 | 42 | log = logging.getLogger(__name__) |
|
43 | 43 | |
@@ -55,10 +55,21 b' def get_diff_whitespace_flag(request):' | |||
|
55 | 55 | return request.GET.get('ignorews', '') == '1' |
|
56 | 56 | |
|
57 | 57 | |
|
58 | class OPS(object): | |
|
59 | ADD = 'A' | |
|
60 |
|
|
|
61 |
|
|
|
58 | @dataclasses.dataclass | |
|
59 | class OPS: | |
|
60 | ADD: str = 'A' | |
|
61 | MOD: str = 'M' | |
|
62 | DEL: str = 'D' | |
|
63 | ||
|
64 | ||
|
65 | @dataclasses.dataclass | |
|
66 | class DiffLineNumber: | |
|
67 | old: int | None | |
|
68 | new: int | None | |
|
69 | ||
|
70 | def __iter__(self): | |
|
71 | yield self.old | |
|
72 | yield self.new | |
|
62 | 73 | |
|
63 | 74 | |
|
64 | 75 | def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3): |
@@ -79,9 +90,7 b' def get_gitdiff(filenode_old, filenode_n' | |||
|
79 | 90 | |
|
80 | 91 | for filenode in (filenode_old, filenode_new): |
|
81 | 92 | if not isinstance(filenode, FileNode): |
|
82 | raise VCSError( | |
|
83 | "Given object should be FileNode object, not %s" | |
|
84 | % filenode.__class__) | |
|
93 | raise VCSError(f"Given object should be FileNode object, not {filenode.__class__}") | |
|
85 | 94 | |
|
86 | 95 | repo = filenode_new.commit.repository |
|
87 | 96 | old_commit = filenode_old.commit or repo.EMPTY_COMMIT |
@@ -103,7 +112,7 b' BIN_FILENODE = 7' | |||
|
103 | 112 | |
|
104 | 113 | class LimitedDiffContainer(object): |
|
105 | 114 | |
|
106 | def __init__(self, diff_limit, cur_diff_size, diff): | |
|
115 | def __init__(self, diff_limit: int, cur_diff_size, diff): | |
|
107 | 116 | self.diff = diff |
|
108 | 117 | self.diff_limit = diff_limit |
|
109 | 118 | self.cur_diff_size = cur_diff_size |
@@ -132,9 +141,9 b' class Action(object):' | |||
|
132 | 141 | |
|
133 | 142 | class DiffProcessor(object): |
|
134 | 143 | """ |
|
135 | Give it a unified or git diff and it returns a list of the files that were | |
|
144 | Give it a unified or git diff, and it returns a list of the files that were | |
|
136 | 145 | mentioned in the diff together with a dict of meta information that |
|
137 | can be used to render it in a HTML template. | |
|
146 | can be used to render it in an HTML template. | |
|
138 | 147 | |
|
139 | 148 | .. note:: Unicode handling |
|
140 | 149 | |
@@ -143,26 +152,26 b' class DiffProcessor(object):' | |||
|
143 | 152 | since the result is intended for presentation to the user. |
|
144 | 153 | |
|
145 | 154 | """ |
|
146 | _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') | |
|
147 | _newline_marker = re.compile(r'^\\ No newline at end of file') | |
|
155 | _chunk_re = re.compile(br'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') | |
|
156 | _newline_marker = re.compile(br'^\\ No newline at end of file') | |
|
148 | 157 | |
|
149 | 158 | # used for inline highlighter word split |
|
150 | _token_re = re.compile(r'()(>|<|&|\W+?)') | |
|
159 | _token_re = re.compile(br'()(>|<|&|\W+?)') | |
|
151 | 160 | |
|
152 | 161 | # collapse ranges of commits over given number |
|
153 | 162 | _collapse_commits_over = 5 |
|
154 | 163 | |
|
155 |
def __init__(self, diff, format='gitdiff', diff_limit= |
|
|
156 |
file_limit= |
|
|
164 | def __init__(self, diff: base.Diff, diff_format='gitdiff', diff_limit: int = 0, | |
|
165 | file_limit: int = 0, show_full_diff=True): | |
|
157 | 166 | """ |
|
158 | 167 | :param diff: A `Diff` object representing a diff from a vcs backend |
|
159 | :param format: format of diff passed, `udiff` or `gitdiff` | |
|
168 | :param diff_format: format of diff passed, `udiff` or `gitdiff` | |
|
160 | 169 | :param diff_limit: define the size of diff that is considered "big" |
|
161 | 170 | based on that parameter cut off will be triggered, set to None |
|
162 | 171 | to show full diff |
|
163 | 172 | """ |
|
164 | 173 | self._diff = diff |
|
165 | self._format = format | |
|
174 | self._format = diff_format | |
|
166 | 175 | self.adds = 0 |
|
167 | 176 | self.removes = 0 |
|
168 | 177 | # calculate diff size |
@@ -173,13 +182,14 b' class DiffProcessor(object):' | |||
|
173 | 182 | self.parsed = False |
|
174 | 183 | self.parsed_diff = [] |
|
175 | 184 | |
|
176 | log.debug('Initialized DiffProcessor with %s mode', format) | |
|
177 | if format == 'gitdiff': | |
|
185 | log.debug('Initialized DiffProcessor with %s mode', diff_format) | |
|
186 | self.differ = self._highlight_line_udiff | |
|
187 | self._parser = self._new_parse_gitdiff | |
|
188 | ||
|
189 | if diff_format == 'gitdiff': | |
|
178 | 190 | self.differ = self._highlight_line_difflib |
|
179 | 191 | self._parser = self._parse_gitdiff |
|
180 | else: | |
|
181 | self.differ = self._highlight_line_udiff | |
|
182 | self._parser = self._new_parse_gitdiff | |
|
192 | raise DeprecationWarning('gitdiff usage is deprecated') | |
|
183 | 193 | |
|
184 | 194 | def _copy_iterator(self): |
|
185 | 195 | """ |
@@ -190,33 +200,33 b' class DiffProcessor(object):' | |||
|
190 | 200 | self.__udiff, iterator_copy = tee(self.__udiff) |
|
191 | 201 | return iterator_copy |
|
192 | 202 | |
|
193 | def _escaper(self, string): | |
|
203 | def _escaper(self, diff_string): | |
|
194 | 204 | """ |
|
195 | 205 | Escaper for diff escapes special chars and checks the diff limit |
|
196 | 206 | |
|
197 | 207 | :param string: |
|
198 | 208 | """ |
|
199 | self.cur_diff_size += len(string) | |
|
209 | self.cur_diff_size += len(diff_string) | |
|
200 | 210 | |
|
201 | 211 | if not self.show_full_diff and (self.cur_diff_size > self.diff_limit): |
|
202 | 212 | raise DiffLimitExceeded('Diff Limit Exceeded') |
|
203 | 213 | |
|
204 | return string \ | |
|
205 | .replace('&', '&')\ | |
|
206 | .replace('<', '<')\ | |
|
207 | .replace('>', '>') | |
|
214 | return diff_string \ | |
|
215 | .replace(b'&', b'&')\ | |
|
216 | .replace(b'<', b'<')\ | |
|
217 | .replace(b'>', b'>') | |
|
208 | 218 | |
|
209 | def _line_counter(self, l): | |
|
219 | def _line_counter(self, diff_line): | |
|
210 | 220 | """ |
|
211 | 221 | Checks each line and bumps total adds/removes for this diff |
|
212 | 222 | |
|
213 | :param l: | |
|
223 | :param diff_line: | |
|
214 | 224 | """ |
|
215 | if l.startswith('+') and not l.startswith('+++'): | |
|
225 | if diff_line.startswith(b'+') and not diff_line.startswith(b'+++'): | |
|
216 | 226 | self.adds += 1 |
|
217 | elif l.startswith('-') and not l.startswith('---'): | |
|
227 | elif diff_line.startswith(b'-') and not diff_line.startswith(b'---'): | |
|
218 | 228 | self.removes += 1 |
|
219 |
return |
|
|
229 | return diff_line | |
|
220 | 230 | |
|
221 | 231 | def _highlight_line_difflib(self, line, next_): |
|
222 | 232 | """ |
@@ -238,9 +248,9 b' class DiffProcessor(object):' | |||
|
238 | 248 | newfrag = ''.join(newwords[j1:j2]) |
|
239 | 249 | if tag != 'equal': |
|
240 | 250 | if oldfrag: |
|
241 |
oldfrag = '<del> |
|
|
251 | oldfrag = f'<del>{oldfrag}</del>' | |
|
242 | 252 | if newfrag: |
|
243 |
newfrag = '<ins> |
|
|
253 | newfrag = f'<ins>{newfrag}</ins>' | |
|
244 | 254 | oldfragments.append(oldfrag) |
|
245 | 255 | newfragments.append(newfrag) |
|
246 | 256 | |
@@ -267,17 +277,11 b' class DiffProcessor(object):' | |||
|
267 | 277 | tag = 'ins' |
|
268 | 278 | else: |
|
269 | 279 | tag = 'del' |
|
270 | l['line'] = '%s<%s>%s</%s>%s' % ( | |
|
271 | l['line'][:start], | |
|
272 | tag, | |
|
273 | l['line'][start:last], | |
|
274 | tag, | |
|
275 | l['line'][last:] | |
|
276 | ) | |
|
280 | l['line'] = f"{l['line'][:start]}<{tag}>{l['line'][start:last]}</{tag}>{l['line'][last:]}" | |
|
277 | 281 | do(line) |
|
278 | 282 | do(next_) |
|
279 | 283 | |
|
280 | def _clean_line(self, line, command): | |
|
284 | def _clean_line(self, line, command: str): | |
|
281 | 285 | if command in ['+', '-', ' ']: |
|
282 | 286 | # only modify the line if it's actually a diff thing |
|
283 | 287 | line = line[1:] |
@@ -285,7 +289,9 b' class DiffProcessor(object):' | |||
|
285 | 289 | |
|
286 | 290 | def _parse_gitdiff(self, inline_diff=True): |
|
287 | 291 | _files = [] |
|
288 | diff_container = lambda arg: arg | |
|
292 | ||
|
293 | def diff_container(arg): | |
|
294 | return arg | |
|
289 | 295 | |
|
290 | 296 | for chunk in self._diff.chunks(): |
|
291 | 297 | head = chunk.header |
@@ -311,30 +317,24 b' class DiffProcessor(object):' | |||
|
311 | 317 | elif head['new_file_mode']: |
|
312 | 318 | op = OPS.ADD |
|
313 | 319 | stats['binary'] = True |
|
314 |
stats['ops'][NEW_FILENODE] = |
|
|
315 | else: # modify operation, can be copy, rename or chmod | |
|
320 | stats['ops'][NEW_FILENODE] = f"new file {safe_str(head['new_file_mode'])}" | |
|
321 | else: # modify operation, can be: copy, rename or chmod | |
|
316 | 322 | |
|
317 | 323 | # CHMOD |
|
318 | 324 | if head['new_mode'] and head['old_mode']: |
|
319 | 325 | op = OPS.MOD |
|
320 | 326 | stats['binary'] = True |
|
321 | stats['ops'][CHMOD_FILENODE] = ( | |
|
322 | 'modified file chmod %s => %s' % ( | |
|
323 | head['old_mode'], head['new_mode'])) | |
|
327 | stats['ops'][CHMOD_FILENODE] = f"modified file chmod {safe_str(head['old_mode'])} => {safe_str(head['new_mode'])}" | |
|
324 | 328 | # RENAME |
|
325 | 329 | if head['rename_from'] != head['rename_to']: |
|
326 | 330 | op = OPS.MOD |
|
327 | 331 | stats['binary'] = True |
|
328 | stats['ops'][RENAMED_FILENODE] = ( | |
|
329 | 'file renamed from %s to %s' % ( | |
|
330 | head['rename_from'], head['rename_to'])) | |
|
332 | stats['ops'][RENAMED_FILENODE] = f"file renamed from {safe_str(head['rename_from'])} to {safe_str(head['rename_to'])}" | |
|
331 | 333 | # COPY |
|
332 | 334 | if head.get('copy_from') and head.get('copy_to'): |
|
333 | 335 | op = OPS.MOD |
|
334 | 336 | stats['binary'] = True |
|
335 | stats['ops'][COPIED_FILENODE] = ( | |
|
336 | 'file copied from %s to %s' % ( | |
|
337 | head['copy_from'], head['copy_to'])) | |
|
337 | stats['ops'][COPIED_FILENODE] = f"file copied from {safe_str(head['copy_from'])} to {safe_str(head['copy_to'])}" | |
|
338 | 338 | |
|
339 | 339 | # If our new parsed headers didn't match anything fallback to |
|
340 | 340 | # old style detection |
@@ -376,9 +376,8 b' class DiffProcessor(object):' | |||
|
376 | 376 | raise DiffLimitExceeded('File Limit Exceeded') |
|
377 | 377 | |
|
378 | 378 | except DiffLimitExceeded: |
|
379 |
diff_container |
|
|
380 | LimitedDiffContainer( | |
|
381 | self.diff_limit, self.cur_diff_size, _diff) | |
|
379 | def diff_container(_diff): | |
|
380 | return LimitedDiffContainer(self.diff_limit, self.cur_diff_size, _diff) | |
|
382 | 381 | |
|
383 | 382 | exceeds_limit = len(raw_diff) > self.file_limit |
|
384 | 383 | limited_diff = True |
@@ -387,7 +386,7 b' class DiffProcessor(object):' | |||
|
387 | 386 | else: # GIT format binary patch, or possibly empty diff |
|
388 | 387 | if head['bin_patch']: |
|
389 | 388 | # we have operation already extracted, but we mark simply |
|
390 | # it's a diff we wont show for binary files | |
|
389 | # it's a diff we won't show for binary files | |
|
391 | 390 | stats['ops'][BIN_FILENODE] = 'binary diff hidden' |
|
392 | 391 | chunks = [] |
|
393 | 392 | |
@@ -397,31 +396,33 b' class DiffProcessor(object):' | |||
|
397 | 396 | # to see the content of the file |
|
398 | 397 | chunks = [] |
|
399 | 398 | |
|
400 |
|
|
|
399 | frag = [{ | |
|
401 | 400 |
|
|
402 | 401 |
|
|
403 | 402 |
|
|
404 | 403 |
|
|
405 |
|
|
|
406 |
|
|
|
404 | } for _op, msg in list(stats['ops'].items()) | |
|
405 | if _op not in [MOD_FILENODE]] | |
|
406 | ||
|
407 | chunks.insert(0, frag) | |
|
407 | 408 | |
|
408 | 409 | _files.append({ |
|
409 |
'filename': safe_ |
|
|
410 | 'filename': safe_str(head['b_path']), | |
|
410 | 411 | 'old_revision': head['a_blob_id'], |
|
411 | 412 | 'new_revision': head['b_blob_id'], |
|
412 | 413 | 'chunks': chunks, |
|
413 |
'raw_diff': safe_ |
|
|
414 | 'raw_diff': safe_str(raw_diff), | |
|
414 | 415 | 'operation': op, |
|
415 | 416 | 'stats': stats, |
|
416 | 417 | 'exceeds_limit': exceeds_limit, |
|
417 | 418 | 'is_limited_diff': limited_diff, |
|
418 | 419 | }) |
|
419 | 420 | |
|
420 | sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1, | |
|
421 |
|
|
|
421 | def operation_sorter(info): | |
|
422 | return {OPS.ADD: 0, OPS.MOD: 1, OPS.DEL: 2}.get(info['operation']) | |
|
422 | 423 | |
|
423 | 424 | if not inline_diff: |
|
424 | return diff_container(sorted(_files, key=sorter)) | |
|
425 | return diff_container(sorted(_files, key=operation_sorter)) | |
|
425 | 426 | |
|
426 | 427 | # highlight inline changes |
|
427 | 428 | for diff_data in _files: |
@@ -440,24 +441,25 b' class DiffProcessor(object):' | |||
|
440 | 441 | except StopIteration: |
|
441 | 442 | pass |
|
442 | 443 | |
|
443 | return diff_container(sorted(_files, key=sorter)) | |
|
444 | return diff_container(sorted(_files, key=operation_sorter)) | |
|
444 | 445 | |
|
445 | 446 | def _check_large_diff(self): |
|
446 | 447 | if self.diff_limit: |
|
447 | 448 | log.debug('Checking if diff exceeds current diff_limit of %s', self.diff_limit) |
|
448 | 449 | if not self.show_full_diff and (self.cur_diff_size > self.diff_limit): |
|
449 |
raise DiffLimitExceeded('Diff Limit ` |
|
|
450 | raise DiffLimitExceeded(f'Diff Limit `{self.diff_limit}` Exceeded') | |
|
450 | 451 | |
|
451 | 452 | # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff |
|
452 | 453 | def _new_parse_gitdiff(self, inline_diff=True): |
|
453 | 454 | _files = [] |
|
454 | 455 | |
|
455 | # this can be overriden later to a LimitedDiffContainer type | |
|
456 |
diff_container |
|
|
456 | # this can be overridden later to a LimitedDiffContainer type | |
|
457 | def diff_container(arg): | |
|
458 | return arg | |
|
457 | 459 | |
|
458 | 460 | for chunk in self._diff.chunks(): |
|
459 | head = chunk.header | |
|
460 |
log.debug('parsing diff %r', |
|
|
461 | head = chunk.header_as_str | |
|
462 | log.debug('parsing diff chunk %r', chunk) | |
|
461 | 463 | |
|
462 | 464 | raw_diff = chunk.raw |
|
463 | 465 | limited_diff = False |
@@ -468,8 +470,8 b' class DiffProcessor(object):' | |||
|
468 | 470 | 'added': 0, |
|
469 | 471 | 'deleted': 0, |
|
470 | 472 | 'binary': False, |
|
471 |
'old_mode': |
|
|
472 |
'new_mode': |
|
|
473 | 'old_mode': '', | |
|
474 | 'new_mode': '', | |
|
473 | 475 | 'ops': {}, |
|
474 | 476 | } |
|
475 | 477 | if head['old_mode']: |
@@ -489,36 +491,30 b' class DiffProcessor(object):' | |||
|
489 | 491 | elif head['new_file_mode']: |
|
490 | 492 | op = OPS.ADD |
|
491 | 493 | stats['binary'] = True |
|
492 |
stats['old_mode'] = |
|
|
494 | stats['old_mode'] = '' | |
|
493 | 495 | stats['new_mode'] = head['new_file_mode'] |
|
494 |
stats['ops'][NEW_FILENODE] = |
|
|
496 | stats['ops'][NEW_FILENODE] = f"new file {head['new_file_mode']}" | |
|
495 | 497 | |
|
496 | # modify operation, can be copy, rename or chmod | |
|
498 | # modify operation, can be: copy, rename or chmod | |
|
497 | 499 | else: |
|
498 | 500 | # CHMOD |
|
499 | 501 | if head['new_mode'] and head['old_mode']: |
|
500 | 502 | op = OPS.MOD |
|
501 | 503 | stats['binary'] = True |
|
502 |
stats['ops'][CHMOD_FILENODE] = |
|
|
503 | 'modified file chmod %s => %s' % ( | |
|
504 | head['old_mode'], head['new_mode'])) | |
|
504 | stats['ops'][CHMOD_FILENODE] = f"modified file chmod {head['old_mode']} => {head['new_mode']}" | |
|
505 | 505 | |
|
506 | 506 | # RENAME |
|
507 | 507 | if head['rename_from'] != head['rename_to']: |
|
508 | 508 | op = OPS.MOD |
|
509 | 509 | stats['binary'] = True |
|
510 | 510 | stats['renamed'] = (head['rename_from'], head['rename_to']) |
|
511 |
stats['ops'][RENAMED_FILENODE] = |
|
|
512 | 'file renamed from %s to %s' % ( | |
|
513 | head['rename_from'], head['rename_to'])) | |
|
511 | stats['ops'][RENAMED_FILENODE] = f"file renamed from {head['rename_from']} to {head['rename_to']}" | |
|
514 | 512 | # COPY |
|
515 | 513 | if head.get('copy_from') and head.get('copy_to'): |
|
516 | 514 | op = OPS.MOD |
|
517 | 515 | stats['binary'] = True |
|
518 | 516 | stats['copied'] = (head['copy_from'], head['copy_to']) |
|
519 |
stats['ops'][COPIED_FILENODE] = |
|
|
520 | 'file copied from %s to %s' % ( | |
|
521 | head['copy_from'], head['copy_to'])) | |
|
517 | stats['ops'][COPIED_FILENODE] = f"file copied from {head['copy_from']} to {head['copy_to']}" | |
|
522 | 518 | |
|
523 | 519 | # If our new parsed headers didn't match anything fallback to |
|
524 | 520 | # old style detection |
@@ -558,9 +554,8 b' class DiffProcessor(object):' | |||
|
558 | 554 | # but the browser is the bottleneck. |
|
559 | 555 | if not self.show_full_diff and exceeds_limit: |
|
560 | 556 | log.debug('File `%s` exceeds current file_limit of %s', |
|
561 |
|
|
|
562 | raise DiffLimitExceeded( | |
|
563 | 'File Limit %s Exceeded', self.file_limit) | |
|
557 | head['b_path'], self.file_limit) | |
|
558 | raise DiffLimitExceeded(f'File Limit {self.file_limit} Exceeded') | |
|
564 | 559 | |
|
565 | 560 | self._check_large_diff() |
|
566 | 561 | |
@@ -573,9 +568,11 b' class DiffProcessor(object):' | |||
|
573 | 568 | stats['ops'][MOD_FILENODE] = 'modified file' |
|
574 | 569 | |
|
575 | 570 | except DiffLimitExceeded: |
|
576 |
d |
|
|
577 | LimitedDiffContainer( | |
|
578 | self.diff_limit, self.cur_diff_size, _diff) | |
|
571 | def limited_diff_container(_diff): | |
|
572 | return LimitedDiffContainer(self.diff_limit, self.cur_diff_size, _diff) | |
|
573 | ||
|
574 | # re-definition of our container wrapper | |
|
575 | diff_container = limited_diff_container | |
|
579 | 576 | |
|
580 | 577 | limited_diff = True |
|
581 | 578 | chunks = [] |
@@ -583,7 +580,7 b' class DiffProcessor(object):' | |||
|
583 | 580 | else: # GIT format binary patch, or possibly empty diff |
|
584 | 581 | if head['bin_patch']: |
|
585 | 582 | # we have operation already extracted, but we mark simply |
|
586 | # it's a diff we wont show for binary files | |
|
583 | # it's a diff we won't show for binary files | |
|
587 | 584 | stats['ops'][BIN_FILENODE] = 'binary diff hidden' |
|
588 | 585 | chunks = [] |
|
589 | 586 | |
@@ -594,31 +591,32 b' class DiffProcessor(object):' | |||
|
594 | 591 | # to see the content of the file |
|
595 | 592 | chunks = [] |
|
596 | 593 | |
|
597 | chunks.insert( | |
|
598 |
|
|
|
594 | frag = [ | |
|
595 | {'old_lineno': '', | |
|
599 | 596 |
|
|
600 | 597 |
|
|
601 | 598 |
|
|
602 |
|
|
|
603 |
|
|
|
599 | } for _op, msg in list(stats['ops'].items()) | |
|
600 | if _op not in [MOD_FILENODE]] | |
|
604 | 601 | |
|
605 | original_filename = safe_unicode(head['a_path']) | |
|
602 | chunks.insert(0, frag) | |
|
603 | ||
|
604 | original_filename = safe_str(head['a_path']) | |
|
606 | 605 | _files.append({ |
|
607 | 606 | 'original_filename': original_filename, |
|
608 |
'filename': safe_ |
|
|
607 | 'filename': safe_str(head['b_path']), | |
|
609 | 608 | 'old_revision': head['a_blob_id'], |
|
610 | 609 | 'new_revision': head['b_blob_id'], |
|
611 | 610 | 'chunks': chunks, |
|
612 |
'raw_diff': safe_ |
|
|
611 | 'raw_diff': safe_str(raw_diff), | |
|
613 | 612 | 'operation': op, |
|
614 | 613 | 'stats': stats, |
|
615 | 614 | 'exceeds_limit': exceeds_limit, |
|
616 | 615 | 'is_limited_diff': limited_diff, |
|
617 | 616 | }) |
|
618 | 617 | |
|
619 | sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1, | |
|
620 |
|
|
|
621 | ||
|
618 | def sorter(info): | |
|
619 | return {OPS.ADD: 0, OPS.MOD: 1, OPS.DEL: 2}.get(info['operation']) | |
|
622 | 620 | return diff_container(sorted(_files, key=sorter)) |
|
623 | 621 | |
|
624 | 622 | # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines |
@@ -667,20 +665,20 b' class DiffProcessor(object):' | |||
|
667 | 665 | line = next(diff_iter) |
|
668 | 666 | |
|
669 | 667 | while old_line < old_end or new_line < new_end: |
|
670 | command = ' ' | |
|
668 | command = b' ' | |
|
671 | 669 | if line: |
|
672 | 670 | command = line[0] |
|
673 | 671 | |
|
674 | 672 | affects_old = affects_new = False |
|
675 | 673 | |
|
676 | 674 | # ignore those if we don't expect them |
|
677 | if command in '#@': | |
|
675 | if command in b'#@': | |
|
678 | 676 | continue |
|
679 | elif command == '+': | |
|
677 | elif command == b'+': | |
|
680 | 678 | affects_new = True |
|
681 | 679 | action = Action.ADD |
|
682 | 680 | stats[0] += 1 |
|
683 | elif command == '-': | |
|
681 | elif command == b'-': | |
|
684 | 682 | affects_old = True |
|
685 | 683 | action = Action.DELETE |
|
686 | 684 | stats[1] += 1 |
@@ -692,8 +690,8 b' class DiffProcessor(object):' | |||
|
692 | 690 | old_line += affects_old |
|
693 | 691 | new_line += affects_new |
|
694 | 692 | lines.append({ |
|
695 | 'old_lineno': affects_old and old_line or '', | |
|
696 | 'new_lineno': affects_new and new_line or '', | |
|
693 | 'old_lineno': affects_old and old_line or b'', | |
|
694 | 'new_lineno': affects_new and new_line or b'', | |
|
697 | 695 | 'action': action, |
|
698 | 696 | 'line': self._clean_line(line, command) |
|
699 | 697 | }) |
@@ -727,6 +725,7 b' class DiffProcessor(object):' | |||
|
727 | 725 | |
|
728 | 726 | try: |
|
729 | 727 | line = next(diff_iter) |
|
728 | assert isinstance(line, bytes) | |
|
730 | 729 | |
|
731 | 730 | while line: |
|
732 | 731 | raw_diff.append(line) |
@@ -737,6 +736,7 b' class DiffProcessor(object):' | |||
|
737 | 736 | break |
|
738 | 737 | |
|
739 | 738 | gr = match.groups() |
|
739 | ||
|
740 | 740 | (old_line, old_end, |
|
741 | 741 | new_line, new_end) = [int(x or 1) for x in gr[:-1]] |
|
742 | 742 | |
@@ -754,7 +754,7 b' class DiffProcessor(object):' | |||
|
754 | 754 | old_line -= 1 |
|
755 | 755 | new_line -= 1 |
|
756 | 756 | |
|
757 |
|
|
|
757 | len(gr) == 5 | |
|
758 | 758 | old_end += old_line |
|
759 | 759 | new_end += new_line |
|
760 | 760 | |
@@ -763,7 +763,8 b' class DiffProcessor(object):' | |||
|
763 | 763 | while old_line < old_end or new_line < new_end: |
|
764 | 764 | command = ' ' |
|
765 | 765 | if line: |
|
766 | command = line[0] | |
|
766 | # This is bytes, so we need to convert it to a str | |
|
767 | command: str = chr(line[0]) | |
|
767 | 768 | |
|
768 | 769 | affects_old = affects_new = False |
|
769 | 770 | |
@@ -786,8 +787,8 b' class DiffProcessor(object):' | |||
|
786 | 787 | old_line += affects_old |
|
787 | 788 | new_line += affects_new |
|
788 | 789 | lines.append({ |
|
789 |
'old_lineno': affects_old and old_line or |
|
|
790 |
'new_lineno': affects_new and new_line or |
|
|
790 | 'old_lineno': affects_old and old_line or None, | |
|
791 | 'new_lineno': affects_new and new_line or None, | |
|
791 | 792 | 'action': action, |
|
792 | 793 | 'line': self._clean_line(line, command) |
|
793 | 794 | }) |
@@ -815,7 +816,7 b' class DiffProcessor(object):' | |||
|
815 | 816 | except StopIteration: |
|
816 | 817 | pass |
|
817 | 818 | |
|
818 | return ''.join(raw_diff), chunks, stats | |
|
819 | return b''.join(raw_diff), chunks, stats | |
|
819 | 820 | |
|
820 | 821 | def _safe_id(self, idstring): |
|
821 | 822 | """Make a string safe for including in an id attribute. |
@@ -833,24 +834,24 b' class DiffProcessor(object):' | |||
|
833 | 834 | |
|
834 | 835 | """ |
|
835 | 836 | # Transform all whitespace to underscore |
|
836 |
idstring = re.sub(r'\s', "_", ' |
|
|
837 | idstring = re.sub(r'\s', "_", f'{idstring}') | |
|
837 | 838 | # Remove everything that is not a hyphen or a member of \w |
|
838 | 839 | idstring = re.sub(r'(?!-)\W', "", idstring).lower() |
|
839 | 840 | return idstring |
|
840 | 841 | |
|
841 | 842 | @classmethod |
|
842 | def diff_splitter(cls, string): | |
|
843 | def diff_splitter(cls, diff_string: bytes): | |
|
843 | 844 | """ |
|
844 | 845 | Diff split that emulates .splitlines() but works only on \n |
|
845 | 846 | """ |
|
846 | if not string: | |
|
847 | if not diff_string: | |
|
847 | 848 | return |
|
848 | elif string == '\n': | |
|
849 | yield '\n' | |
|
849 | elif diff_string == b'\n': | |
|
850 | yield b'\n' | |
|
850 | 851 | else: |
|
851 | 852 | |
|
852 | has_newline = string.endswith('\n') | |
|
853 | elements = string.split('\n') | |
|
853 | has_newline = diff_string.endswith(b'\n') | |
|
854 | elements = diff_string.split(b'\n') | |
|
854 | 855 | if has_newline: |
|
855 | 856 | # skip last element as it's empty string from newlines |
|
856 | 857 | elements = elements[:-1] |
@@ -860,9 +861,9 b' class DiffProcessor(object):' | |||
|
860 | 861 | for cnt, line in enumerate(elements, start=1): |
|
861 | 862 | last_line = cnt == len_elements |
|
862 | 863 | if last_line and not has_newline: |
|
863 |
yield |
|
|
864 | yield line | |
|
864 | 865 | else: |
|
865 |
yield |
|
|
866 | yield line + b'\n' | |
|
866 | 867 | |
|
867 | 868 | def prepare(self, inline_diff=True): |
|
868 | 869 | """ |
@@ -879,132 +880,7 b' class DiffProcessor(object):' | |||
|
879 | 880 | """ |
|
880 | 881 | Returns raw diff as a byte string |
|
881 | 882 | """ |
|
882 | return self._diff.raw | |
|
883 | ||
|
884 | def as_html(self, table_class='code-difftable', line_class='line', | |
|
885 | old_lineno_class='lineno old', new_lineno_class='lineno new', | |
|
886 | code_class='code', enable_comments=False, parsed_lines=None): | |
|
887 | """ | |
|
888 | Return given diff as html table with customized css classes | |
|
889 | """ | |
|
890 | # TODO(marcink): not sure how to pass in translator | |
|
891 | # here in an efficient way, leave the _ for proper gettext extraction | |
|
892 | _ = lambda s: s | |
|
893 | ||
|
894 | def _link_to_if(condition, label, url): | |
|
895 | """ | |
|
896 | Generates a link if condition is meet or just the label if not. | |
|
897 | """ | |
|
898 | ||
|
899 | if condition: | |
|
900 | return '''<a href="%(url)s" class="tooltip" | |
|
901 | title="%(title)s">%(label)s</a>''' % { | |
|
902 | 'title': _('Click to select line'), | |
|
903 | 'url': url, | |
|
904 | 'label': label | |
|
905 | } | |
|
906 | else: | |
|
907 | return label | |
|
908 | if not self.parsed: | |
|
909 | self.prepare() | |
|
910 | ||
|
911 | diff_lines = self.parsed_diff | |
|
912 | if parsed_lines: | |
|
913 | diff_lines = parsed_lines | |
|
914 | ||
|
915 | _html_empty = True | |
|
916 | _html = [] | |
|
917 | _html.append('''<table class="%(table_class)s">\n''' % { | |
|
918 | 'table_class': table_class | |
|
919 | }) | |
|
920 | ||
|
921 | for diff in diff_lines: | |
|
922 | for line in diff['chunks']: | |
|
923 | _html_empty = False | |
|
924 | for change in line: | |
|
925 | _html.append('''<tr class="%(lc)s %(action)s">\n''' % { | |
|
926 | 'lc': line_class, | |
|
927 | 'action': change['action'] | |
|
928 | }) | |
|
929 | anchor_old_id = '' | |
|
930 | anchor_new_id = '' | |
|
931 | anchor_old = "%(filename)s_o%(oldline_no)s" % { | |
|
932 | 'filename': self._safe_id(diff['filename']), | |
|
933 | 'oldline_no': change['old_lineno'] | |
|
934 | } | |
|
935 | anchor_new = "%(filename)s_n%(oldline_no)s" % { | |
|
936 | 'filename': self._safe_id(diff['filename']), | |
|
937 | 'oldline_no': change['new_lineno'] | |
|
938 | } | |
|
939 | cond_old = (change['old_lineno'] != '...' and | |
|
940 | change['old_lineno']) | |
|
941 | cond_new = (change['new_lineno'] != '...' and | |
|
942 | change['new_lineno']) | |
|
943 | if cond_old: | |
|
944 | anchor_old_id = 'id="%s"' % anchor_old | |
|
945 | if cond_new: | |
|
946 | anchor_new_id = 'id="%s"' % anchor_new | |
|
947 | ||
|
948 | if change['action'] != Action.CONTEXT: | |
|
949 | anchor_link = True | |
|
950 | else: | |
|
951 | anchor_link = False | |
|
952 | ||
|
953 | ########################################################### | |
|
954 | # COMMENT ICONS | |
|
955 | ########################################################### | |
|
956 | _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''') | |
|
957 | ||
|
958 | if enable_comments and change['action'] != Action.CONTEXT: | |
|
959 | _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''') | |
|
960 | ||
|
961 | _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''') | |
|
962 | ||
|
963 | ########################################################### | |
|
964 | # OLD LINE NUMBER | |
|
965 | ########################################################### | |
|
966 | _html.append('''\t<td %(a_id)s class="%(olc)s">''' % { | |
|
967 | 'a_id': anchor_old_id, | |
|
968 | 'olc': old_lineno_class | |
|
969 | }) | |
|
970 | ||
|
971 | _html.append('''%(link)s''' % { | |
|
972 | 'link': _link_to_if(anchor_link, change['old_lineno'], | |
|
973 | '#%s' % anchor_old) | |
|
974 | }) | |
|
975 | _html.append('''</td>\n''') | |
|
976 | ########################################################### | |
|
977 | # NEW LINE NUMBER | |
|
978 | ########################################################### | |
|
979 | ||
|
980 | _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % { | |
|
981 | 'a_id': anchor_new_id, | |
|
982 | 'nlc': new_lineno_class | |
|
983 | }) | |
|
984 | ||
|
985 | _html.append('''%(link)s''' % { | |
|
986 | 'link': _link_to_if(anchor_link, change['new_lineno'], | |
|
987 | '#%s' % anchor_new) | |
|
988 | }) | |
|
989 | _html.append('''</td>\n''') | |
|
990 | ########################################################### | |
|
991 | # CODE | |
|
992 | ########################################################### | |
|
993 | code_classes = [code_class] | |
|
994 | if (not enable_comments or | |
|
995 | change['action'] == Action.CONTEXT): | |
|
996 | code_classes.append('no-comment') | |
|
997 | _html.append('\t<td class="%s">' % ' '.join(code_classes)) | |
|
998 | _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % { | |
|
999 | 'code': change['line'] | |
|
1000 | }) | |
|
1001 | ||
|
1002 | _html.append('''\t</td>''') | |
|
1003 | _html.append('''\n</tr>\n''') | |
|
1004 | _html.append('''</table>''') | |
|
1005 | if _html_empty: | |
|
1006 | return None | |
|
1007 | return ''.join(_html) | |
|
883 | return self._diff.raw.tobytes() | |
|
1008 | 884 | |
|
1009 | 885 | def stat(self): |
|
1010 | 886 | """ |
@@ -1013,33 +889,32 b' class DiffProcessor(object):' | |||
|
1013 | 889 | return self.adds, self.removes |
|
1014 | 890 | |
|
1015 | 891 | def get_context_of_line( |
|
1016 | self, path, diff_line=None, context_before=3, context_after=3): | |
|
892 | self, path, diff_line: DiffLineNumber = None, context_before: int = 3, context_after: int = 3): | |
|
1017 | 893 | """ |
|
1018 | 894 | Returns the context lines for the specified diff line. |
|
1019 | ||
|
1020 | :type diff_line: :class:`DiffLineNumber` | |
|
1021 | 895 | """ |
|
1022 | 896 | assert self.parsed, "DiffProcessor is not initialized." |
|
1023 | 897 | |
|
1024 | 898 | if None not in diff_line: |
|
1025 | raise ValueError( | |
|
1026 | "Cannot specify both line numbers: {}".format(diff_line)) | |
|
899 | raise ValueError(f"Cannot specify both line numbers in diff_line: {diff_line}") | |
|
1027 | 900 | |
|
1028 | 901 | file_diff = self._get_file_diff(path) |
|
1029 | 902 | chunk, idx = self._find_chunk_line_index(file_diff, diff_line) |
|
1030 | 903 | |
|
1031 | 904 | first_line_to_include = max(idx - context_before, 0) |
|
1032 | 905 | first_line_after_context = idx + context_after + 1 |
|
1033 | context_lines = chunk[first_line_to_include:first_line_after_context] | |
|
906 | context_lines = chunk['lines'][first_line_to_include:first_line_after_context] | |
|
1034 | 907 | |
|
1035 | 908 | line_contents = [ |
|
1036 | 909 | _context_line(line) for line in context_lines |
|
1037 |
if _is_diff_content(line) |
|
|
910 | if _is_diff_content(line) | |
|
911 | ] | |
|
912 | ||
|
1038 | 913 | # TODO: johbo: Interim fixup, the diff chunks drop the final newline. |
|
1039 | 914 | # Once they are fixed, we can drop this line here. |
|
1040 | 915 | if line_contents: |
|
1041 | 916 | line_contents[-1] = ( |
|
1042 | line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n') | |
|
917 | line_contents[-1][0], line_contents[-1][1].rstrip(b'\n') + b'\n') | |
|
1043 | 918 | return line_contents |
|
1044 | 919 | |
|
1045 | 920 | def find_context(self, path, context, offset=0): |
@@ -1062,8 +937,10 b' class DiffProcessor(object):' | |||
|
1062 | 937 | file_diff = self._get_file_diff(path) |
|
1063 | 938 | |
|
1064 | 939 | for chunk in file_diff['chunks']: |
|
940 | if not isinstance(chunk, dict): | |
|
941 | continue | |
|
1065 | 942 | context_iter = iter(context) |
|
1066 | for line_idx, line in enumerate(chunk): | |
|
943 | for line_idx, line in enumerate(chunk['lines']): | |
|
1067 | 944 | try: |
|
1068 | 945 | if _context_line(line) == next(context_iter): |
|
1069 | 946 | continue |
@@ -1081,7 +958,7 b' class DiffProcessor(object):' | |||
|
1081 | 958 | |
|
1082 | 959 | effective_offset = len(context) - offset |
|
1083 | 960 | found_at_diff_lines = [ |
|
1084 | _line_to_diff_line_number(chunk[idx - effective_offset]) | |
|
961 | _line_to_diff_line_number(chunk['lines'][idx - effective_offset]) | |
|
1085 | 962 | for idx, chunk in matches] |
|
1086 | 963 | |
|
1087 | 964 | return found_at_diff_lines |
@@ -1091,18 +968,19 b' class DiffProcessor(object):' | |||
|
1091 | 968 | if file_diff['filename'] == path: |
|
1092 | 969 | break |
|
1093 | 970 | else: |
|
1094 |
raise FileNotInDiffException("File {} not in diff" |
|
|
971 | raise FileNotInDiffException(f"File {path} not in diff") | |
|
1095 | 972 | return file_diff |
|
1096 | 973 | |
|
1097 | 974 | def _find_chunk_line_index(self, file_diff, diff_line): |
|
1098 | 975 | for chunk in file_diff['chunks']: |
|
1099 |
f |
|
|
1100 | if line['old_lineno'] == diff_line.old: | |
|
1101 | return chunk, idx | |
|
1102 |
if line[' |
|
|
1103 | return chunk, idx | |
|
1104 | raise LineNotInDiffException( | |
|
1105 | "The line {} is not part of the diff.".format(diff_line)) | |
|
976 | if not isinstance(chunk, dict): | |
|
977 | continue | |
|
978 | for line_idx, line in enumerate(chunk['lines']): | |
|
979 | if diff_line.old and line['old_lineno'] == diff_line.old: | |
|
980 | return chunk, line_idx | |
|
981 | if diff_line.new and line['new_lineno'] == diff_line.new: | |
|
982 | return chunk, line_idx | |
|
983 | raise LineNotInDiffException(f"The line {diff_line} is not part of the diff.") | |
|
1106 | 984 | |
|
1107 | 985 | |
|
1108 | 986 | def _is_diff_content(line): |
@@ -1111,10 +989,7 b' def _is_diff_content(line):' | |||
|
1111 | 989 | |
|
1112 | 990 | |
|
1113 | 991 | def _context_line(line): |
|
1114 |
return |
|
|
1115 | ||
|
1116 | ||
|
1117 | DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new']) | |
|
992 | return line['action'], line['line'] | |
|
1118 | 993 | |
|
1119 | 994 | |
|
1120 | 995 | def _line_to_diff_line_number(line): |
@@ -1188,7 +1063,7 b' def cache_diff(cached_diff_file, diff, c' | |||
|
1188 | 1063 | with bz2.BZ2File(cached_diff_file, 'wb') as f: |
|
1189 | 1064 | pickle.dump(struct, f) |
|
1190 | 1065 | except Exception: |
|
1191 | log.warn('Failed to save cache', exc_info=True) | |
|
1066 | log.warning('Failed to save cache', exc_info=True) | |
|
1192 | 1067 | _cleanup_cache_file(cached_diff_file) |
|
1193 | 1068 | |
|
1194 | 1069 | log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start) |
@@ -1222,7 +1097,7 b' def load_cached_diff(cached_diff_file):' | |||
|
1222 | 1097 | with bz2.BZ2File(cached_diff_file, 'rb') as f: |
|
1223 | 1098 | data = pickle.load(f) |
|
1224 | 1099 | except Exception: |
|
1225 | log.warn('Failed to read diff cache file', exc_info=True) | |
|
1100 | log.warning('Failed to read diff cache file', exc_info=True) | |
|
1226 | 1101 | |
|
1227 | 1102 | if not data: |
|
1228 | 1103 | data = default_struct |
@@ -1254,7 +1129,7 b' def generate_diff_cache_key(*args):' | |||
|
1254 | 1129 | return input_param or None # prevent empty string arguments |
|
1255 | 1130 | |
|
1256 | 1131 | return '_'.join([ |
|
1257 | '{}' for i in range(len(args))]).format(*map(arg_mapper, args)) | |
|
1132 | '{}' for _i in range(len(args))]).format(*list(map(arg_mapper, args))) | |
|
1258 | 1133 | |
|
1259 | 1134 | |
|
1260 | 1135 | def diff_cache_exist(cache_storage, *args): |
@@ -1266,6 +1141,6 b' def diff_cache_exist(cache_storage, *arg' | |||
|
1266 | 1141 | cache_file_path = os.path.join(cache_storage, cache_key) |
|
1267 | 1142 | # prevent path traversal attacks using some param that have e.g '../../' |
|
1268 | 1143 | if not os.path.abspath(cache_file_path).startswith(cache_storage): |
|
1269 |
raise ValueError('Final path must be within {}' |
|
|
1144 | raise ValueError(f'Final path must be within {cache_storage}') | |
|
1270 | 1145 | |
|
1271 | 1146 | return cache_file_path |
General Comments 0
You need to be logged in to leave comments.
Login now