diff --git a/rhodecode/lib/diff_match_patch.py b/rhodecode/lib/diff_match_patch.py
--- a/rhodecode/lib/diff_match_patch.py
+++ b/rhodecode/lib/diff_match_patch.py
@@ -28,7 +28,9 @@ import math
import re
import sys
import time
-import urllib.request, urllib.parse, urllib.error
+import urllib.request
+import urllib.parse
+import urllib.error
class diff_match_patch:
@@ -168,7 +170,7 @@ class diff_match_patch:
diffs = [
(self.DIFF_INSERT, longtext[:i]),
(self.DIFF_EQUAL, shorttext),
- (self.DIFF_INSERT, longtext[i + len(shorttext) :]),
+ (self.DIFF_INSERT, longtext[i + len(shorttext):]),
]
# Swap insertions for deletions if diff is reversed.
if len(text1) > len(text2):
@@ -241,7 +243,7 @@ class diff_match_patch:
if count_delete >= 1 and count_insert >= 1:
# Delete the offending records and add the merged ones.
a = self.diff_main(text_delete, text_insert, False, deadline)
- diffs[pointer - count_delete - count_insert : pointer] = a
+ diffs[pointer - count_delete - count_insert: pointer] = a
pointer = pointer - count_delete - count_insert + len(a)
count_insert = 0
count_delete = 0
@@ -1929,7 +1931,7 @@ class diff_match_patch:
return patches
text = textline.split("\n")
while len(text) != 0:
- m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
+ m = re.match(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
if not m:
raise ValueError("Invalid patch string: " + text[0])
patch = patch_obj()
diff --git a/rhodecode/lib/diffs.py b/rhodecode/lib/diffs.py
--- a/rhodecode/lib/diffs.py
+++ b/rhodecode/lib/diffs.py
@@ -22,14 +22,13 @@
"""
Set of diffing helpers, previously part of vcs
"""
-
+import dataclasses
import os
import re
import bz2
import gzip
import time
-import collections
import difflib
import logging
import pickle
@@ -37,7 +36,8 @@ from itertools import tee
from rhodecode.lib.vcs.exceptions import VCSError
from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
-from rhodecode.lib.utils2 import safe_unicode, safe_str
+from rhodecode.lib.vcs.backends import base
+from rhodecode.lib.str_utils import safe_str
log = logging.getLogger(__name__)
@@ -55,10 +55,21 @@ def get_diff_whitespace_flag(request):
return request.GET.get('ignorews', '') == '1'
-class OPS(object):
- ADD = 'A'
- MOD = 'M'
- DEL = 'D'
+@dataclasses.dataclass
+class OPS:
+ ADD: str = 'A'
+ MOD: str = 'M'
+ DEL: str = 'D'
+
+
+@dataclasses.dataclass
+class DiffLineNumber:
+ old: int | None
+ new: int | None
+
+ def __iter__(self):
+ yield self.old
+ yield self.new
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
@@ -79,9 +90,7 @@ def get_gitdiff(filenode_old, filenode_n
for filenode in (filenode_old, filenode_new):
if not isinstance(filenode, FileNode):
- raise VCSError(
- "Given object should be FileNode object, not %s"
- % filenode.__class__)
+ raise VCSError(f"Given object should be FileNode object, not {filenode.__class__}")
repo = filenode_new.commit.repository
old_commit = filenode_old.commit or repo.EMPTY_COMMIT
@@ -103,7 +112,7 @@ BIN_FILENODE = 7
class LimitedDiffContainer(object):
- def __init__(self, diff_limit, cur_diff_size, diff):
+ def __init__(self, diff_limit: int, cur_diff_size, diff):
self.diff = diff
self.diff_limit = diff_limit
self.cur_diff_size = cur_diff_size
@@ -132,9 +141,9 @@ class Action(object):
class DiffProcessor(object):
"""
- Give it a unified or git diff and it returns a list of the files that were
+ Give it a unified or git diff, and it returns a list of the files that were
mentioned in the diff together with a dict of meta information that
- can be used to render it in a HTML template.
+ can be used to render it in an HTML template.
.. note:: Unicode handling
@@ -143,26 +152,26 @@ class DiffProcessor(object):
since the result is intended for presentation to the user.
"""
- _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
- _newline_marker = re.compile(r'^\\ No newline at end of file')
+ _chunk_re = re.compile(br'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
+ _newline_marker = re.compile(br'^\\ No newline at end of file')
# used for inline highlighter word split
- _token_re = re.compile(r'()(>|<|&|\W+?)')
+ _token_re = re.compile(br'()(>|<|&|\W+?)')
# collapse ranges of commits over given number
_collapse_commits_over = 5
- def __init__(self, diff, format='gitdiff', diff_limit=None,
- file_limit=None, show_full_diff=True):
+ def __init__(self, diff: base.Diff, diff_format='gitdiff', diff_limit: int = 0,
+ file_limit: int = 0, show_full_diff=True):
"""
:param diff: A `Diff` object representing a diff from a vcs backend
- :param format: format of diff passed, `udiff` or `gitdiff`
+ :param diff_format: format of diff passed, `udiff` or `gitdiff`
:param diff_limit: define the size of diff that is considered "big"
based on that parameter cut off will be triggered, set to None
to show full diff
"""
self._diff = diff
- self._format = format
+ self._format = diff_format
self.adds = 0
self.removes = 0
# calculate diff size
@@ -173,13 +182,14 @@ class DiffProcessor(object):
self.parsed = False
self.parsed_diff = []
- log.debug('Initialized DiffProcessor with %s mode', format)
- if format == 'gitdiff':
+ log.debug('Initialized DiffProcessor with %s mode', diff_format)
+ self.differ = self._highlight_line_udiff
+ self._parser = self._new_parse_gitdiff
+
+ if diff_format == 'gitdiff':
self.differ = self._highlight_line_difflib
self._parser = self._parse_gitdiff
- else:
- self.differ = self._highlight_line_udiff
- self._parser = self._new_parse_gitdiff
+ raise DeprecationWarning('gitdiff usage is deprecated')
def _copy_iterator(self):
"""
@@ -190,33 +200,33 @@ class DiffProcessor(object):
self.__udiff, iterator_copy = tee(self.__udiff)
return iterator_copy
- def _escaper(self, string):
+ def _escaper(self, diff_string):
"""
Escaper for diff escapes special chars and checks the diff limit
:param string:
"""
- self.cur_diff_size += len(string)
+ self.cur_diff_size += len(diff_string)
if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
raise DiffLimitExceeded('Diff Limit Exceeded')
- return string \
- .replace('&', '&')\
- .replace('<', '<')\
- .replace('>', '>')
+ return diff_string \
+ .replace(b'&', b'&')\
+ .replace(b'<', b'<')\
+ .replace(b'>', b'>')
- def _line_counter(self, l):
+ def _line_counter(self, diff_line):
"""
Checks each line and bumps total adds/removes for this diff
- :param l:
+ :param diff_line:
"""
- if l.startswith('+') and not l.startswith('+++'):
+ if diff_line.startswith(b'+') and not diff_line.startswith(b'+++'):
self.adds += 1
- elif l.startswith('-') and not l.startswith('---'):
+ elif diff_line.startswith(b'-') and not diff_line.startswith(b'---'):
self.removes += 1
- return safe_unicode(l)
+ return diff_line
def _highlight_line_difflib(self, line, next_):
"""
@@ -238,9 +248,9 @@ class DiffProcessor(object):
newfrag = ''.join(newwords[j1:j2])
if tag != 'equal':
if oldfrag:
- oldfrag = '%s' % oldfrag
+ oldfrag = f'{oldfrag}'
if newfrag:
- newfrag = '%s' % newfrag
+ newfrag = f'{newfrag}'
oldfragments.append(oldfrag)
newfragments.append(newfrag)
@@ -267,17 +277,11 @@ class DiffProcessor(object):
tag = 'ins'
else:
tag = 'del'
- l['line'] = '%s<%s>%s%s>%s' % (
- l['line'][:start],
- tag,
- l['line'][start:last],
- tag,
- l['line'][last:]
- )
+ l['line'] = f"{l['line'][:start]}<{tag}>{l['line'][start:last]}{tag}>{l['line'][last:]}"
do(line)
do(next_)
- def _clean_line(self, line, command):
+ def _clean_line(self, line, command: str):
if command in ['+', '-', ' ']:
# only modify the line if it's actually a diff thing
line = line[1:]
@@ -285,7 +289,9 @@ class DiffProcessor(object):
def _parse_gitdiff(self, inline_diff=True):
_files = []
- diff_container = lambda arg: arg
+
+ def diff_container(arg):
+ return arg
for chunk in self._diff.chunks():
head = chunk.header
@@ -311,30 +317,24 @@ class DiffProcessor(object):
elif head['new_file_mode']:
op = OPS.ADD
stats['binary'] = True
- stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
- else: # modify operation, can be copy, rename or chmod
+ stats['ops'][NEW_FILENODE] = f"new file {safe_str(head['new_file_mode'])}"
+ else: # modify operation, can be: copy, rename or chmod
# CHMOD
if head['new_mode'] and head['old_mode']:
op = OPS.MOD
stats['binary'] = True
- stats['ops'][CHMOD_FILENODE] = (
- 'modified file chmod %s => %s' % (
- head['old_mode'], head['new_mode']))
+ stats['ops'][CHMOD_FILENODE] = f"modified file chmod {safe_str(head['old_mode'])} => {safe_str(head['new_mode'])}"
# RENAME
if head['rename_from'] != head['rename_to']:
op = OPS.MOD
stats['binary'] = True
- stats['ops'][RENAMED_FILENODE] = (
- 'file renamed from %s to %s' % (
- head['rename_from'], head['rename_to']))
+ stats['ops'][RENAMED_FILENODE] = f"file renamed from {safe_str(head['rename_from'])} to {safe_str(head['rename_to'])}"
# COPY
if head.get('copy_from') and head.get('copy_to'):
op = OPS.MOD
stats['binary'] = True
- stats['ops'][COPIED_FILENODE] = (
- 'file copied from %s to %s' % (
- head['copy_from'], head['copy_to']))
+ stats['ops'][COPIED_FILENODE] = f"file copied from {safe_str(head['copy_from'])} to {safe_str(head['copy_to'])}"
# If our new parsed headers didn't match anything fallback to
# old style detection
@@ -376,9 +376,8 @@ class DiffProcessor(object):
raise DiffLimitExceeded('File Limit Exceeded')
except DiffLimitExceeded:
- diff_container = lambda _diff: \
- LimitedDiffContainer(
- self.diff_limit, self.cur_diff_size, _diff)
+ def diff_container(_diff):
+ return LimitedDiffContainer(self.diff_limit, self.cur_diff_size, _diff)
exceeds_limit = len(raw_diff) > self.file_limit
limited_diff = True
@@ -387,7 +386,7 @@ class DiffProcessor(object):
else: # GIT format binary patch, or possibly empty diff
if head['bin_patch']:
# we have operation already extracted, but we mark simply
- # it's a diff we wont show for binary files
+ # it's a diff we won't show for binary files
stats['ops'][BIN_FILENODE] = 'binary diff hidden'
chunks = []
@@ -397,31 +396,33 @@ class DiffProcessor(object):
# to see the content of the file
chunks = []
- chunks.insert(0, [{
- 'old_lineno': '',
- 'new_lineno': '',
- 'action': Action.CONTEXT,
- 'line': msg,
- } for _op, msg in stats['ops'].items()
- if _op not in [MOD_FILENODE]])
+ frag = [{
+ 'old_lineno': '',
+ 'new_lineno': '',
+ 'action': Action.CONTEXT,
+ 'line': msg,
+ } for _op, msg in list(stats['ops'].items())
+ if _op not in [MOD_FILENODE]]
+
+ chunks.insert(0, frag)
_files.append({
- 'filename': safe_unicode(head['b_path']),
+ 'filename': safe_str(head['b_path']),
'old_revision': head['a_blob_id'],
'new_revision': head['b_blob_id'],
'chunks': chunks,
- 'raw_diff': safe_unicode(raw_diff),
+ 'raw_diff': safe_str(raw_diff),
'operation': op,
'stats': stats,
'exceeds_limit': exceeds_limit,
'is_limited_diff': limited_diff,
})
- sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
- OPS.DEL: 2}.get(info['operation'])
+ def operation_sorter(info):
+ return {OPS.ADD: 0, OPS.MOD: 1, OPS.DEL: 2}.get(info['operation'])
if not inline_diff:
- return diff_container(sorted(_files, key=sorter))
+ return diff_container(sorted(_files, key=operation_sorter))
# highlight inline changes
for diff_data in _files:
@@ -440,24 +441,25 @@ class DiffProcessor(object):
except StopIteration:
pass
- return diff_container(sorted(_files, key=sorter))
+ return diff_container(sorted(_files, key=operation_sorter))
def _check_large_diff(self):
if self.diff_limit:
log.debug('Checking if diff exceeds current diff_limit of %s', self.diff_limit)
if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
- raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
+ raise DiffLimitExceeded(f'Diff Limit `{self.diff_limit}` Exceeded')
# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
def _new_parse_gitdiff(self, inline_diff=True):
_files = []
- # this can be overriden later to a LimitedDiffContainer type
- diff_container = lambda arg: arg
+ # this can be overridden later to a LimitedDiffContainer type
+ def diff_container(arg):
+ return arg
for chunk in self._diff.chunks():
- head = chunk.header
- log.debug('parsing diff %r', head)
+ head = chunk.header_as_str
+ log.debug('parsing diff chunk %r', chunk)
raw_diff = chunk.raw
limited_diff = False
@@ -468,8 +470,8 @@ class DiffProcessor(object):
'added': 0,
'deleted': 0,
'binary': False,
- 'old_mode': None,
- 'new_mode': None,
+ 'old_mode': '',
+ 'new_mode': '',
'ops': {},
}
if head['old_mode']:
@@ -489,36 +491,30 @@ class DiffProcessor(object):
elif head['new_file_mode']:
op = OPS.ADD
stats['binary'] = True
- stats['old_mode'] = None
+ stats['old_mode'] = ''
stats['new_mode'] = head['new_file_mode']
- stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
+ stats['ops'][NEW_FILENODE] = f"new file {head['new_file_mode']}"
- # modify operation, can be copy, rename or chmod
+ # modify operation, can be: copy, rename or chmod
else:
# CHMOD
if head['new_mode'] and head['old_mode']:
op = OPS.MOD
stats['binary'] = True
- stats['ops'][CHMOD_FILENODE] = (
- 'modified file chmod %s => %s' % (
- head['old_mode'], head['new_mode']))
+ stats['ops'][CHMOD_FILENODE] = f"modified file chmod {head['old_mode']} => {head['new_mode']}"
# RENAME
if head['rename_from'] != head['rename_to']:
op = OPS.MOD
stats['binary'] = True
stats['renamed'] = (head['rename_from'], head['rename_to'])
- stats['ops'][RENAMED_FILENODE] = (
- 'file renamed from %s to %s' % (
- head['rename_from'], head['rename_to']))
+ stats['ops'][RENAMED_FILENODE] = f"file renamed from {head['rename_from']} to {head['rename_to']}"
# COPY
if head.get('copy_from') and head.get('copy_to'):
op = OPS.MOD
stats['binary'] = True
stats['copied'] = (head['copy_from'], head['copy_to'])
- stats['ops'][COPIED_FILENODE] = (
- 'file copied from %s to %s' % (
- head['copy_from'], head['copy_to']))
+ stats['ops'][COPIED_FILENODE] = f"file copied from {head['copy_from']} to {head['copy_to']}"
# If our new parsed headers didn't match anything fallback to
# old style detection
@@ -558,9 +554,8 @@ class DiffProcessor(object):
# but the browser is the bottleneck.
if not self.show_full_diff and exceeds_limit:
log.debug('File `%s` exceeds current file_limit of %s',
- safe_unicode(head['b_path']), self.file_limit)
- raise DiffLimitExceeded(
- 'File Limit %s Exceeded', self.file_limit)
+ head['b_path'], self.file_limit)
+ raise DiffLimitExceeded(f'File Limit {self.file_limit} Exceeded')
self._check_large_diff()
@@ -573,9 +568,11 @@ class DiffProcessor(object):
stats['ops'][MOD_FILENODE] = 'modified file'
except DiffLimitExceeded:
- diff_container = lambda _diff: \
- LimitedDiffContainer(
- self.diff_limit, self.cur_diff_size, _diff)
+ def limited_diff_container(_diff):
+ return LimitedDiffContainer(self.diff_limit, self.cur_diff_size, _diff)
+
+ # re-definition of our container wrapper
+ diff_container = limited_diff_container
limited_diff = True
chunks = []
@@ -583,7 +580,7 @@ class DiffProcessor(object):
else: # GIT format binary patch, or possibly empty diff
if head['bin_patch']:
# we have operation already extracted, but we mark simply
- # it's a diff we wont show for binary files
+ # it's a diff we won't show for binary files
stats['ops'][BIN_FILENODE] = 'binary diff hidden'
chunks = []
@@ -594,31 +591,32 @@ class DiffProcessor(object):
# to see the content of the file
chunks = []
- chunks.insert(
- 0, [{'old_lineno': '',
- 'new_lineno': '',
- 'action': Action.CONTEXT,
- 'line': msg,
- } for _op, msg in stats['ops'].items()
- if _op not in [MOD_FILENODE]])
+ frag = [
+ {'old_lineno': '',
+ 'new_lineno': '',
+ 'action': Action.CONTEXT,
+ 'line': msg,
+ } for _op, msg in list(stats['ops'].items())
+ if _op not in [MOD_FILENODE]]
- original_filename = safe_unicode(head['a_path'])
+ chunks.insert(0, frag)
+
+ original_filename = safe_str(head['a_path'])
_files.append({
'original_filename': original_filename,
- 'filename': safe_unicode(head['b_path']),
+ 'filename': safe_str(head['b_path']),
'old_revision': head['a_blob_id'],
'new_revision': head['b_blob_id'],
'chunks': chunks,
- 'raw_diff': safe_unicode(raw_diff),
+ 'raw_diff': safe_str(raw_diff),
'operation': op,
'stats': stats,
'exceeds_limit': exceeds_limit,
'is_limited_diff': limited_diff,
})
- sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
- OPS.DEL: 2}.get(info['operation'])
-
+ def sorter(info):
+ return {OPS.ADD: 0, OPS.MOD: 1, OPS.DEL: 2}.get(info['operation'])
return diff_container(sorted(_files, key=sorter))
# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
@@ -667,20 +665,20 @@ class DiffProcessor(object):
line = next(diff_iter)
while old_line < old_end or new_line < new_end:
- command = ' '
+ command = b' '
if line:
command = line[0]
affects_old = affects_new = False
# ignore those if we don't expect them
- if command in '#@':
+ if command in b'#@':
continue
- elif command == '+':
+ elif command == b'+':
affects_new = True
action = Action.ADD
stats[0] += 1
- elif command == '-':
+ elif command == b'-':
affects_old = True
action = Action.DELETE
stats[1] += 1
@@ -692,8 +690,8 @@ class DiffProcessor(object):
old_line += affects_old
new_line += affects_new
lines.append({
- 'old_lineno': affects_old and old_line or '',
- 'new_lineno': affects_new and new_line or '',
+ 'old_lineno': affects_old and old_line or b'',
+ 'new_lineno': affects_new and new_line or b'',
'action': action,
'line': self._clean_line(line, command)
})
@@ -727,6 +725,7 @@ class DiffProcessor(object):
try:
line = next(diff_iter)
+ assert isinstance(line, bytes)
while line:
raw_diff.append(line)
@@ -737,6 +736,7 @@ class DiffProcessor(object):
break
gr = match.groups()
+
(old_line, old_end,
new_line, new_end) = [int(x or 1) for x in gr[:-1]]
@@ -754,7 +754,7 @@ class DiffProcessor(object):
old_line -= 1
new_line -= 1
- context = len(gr) == 5
+ len(gr) == 5
old_end += old_line
new_end += new_line
@@ -763,7 +763,8 @@ class DiffProcessor(object):
while old_line < old_end or new_line < new_end:
command = ' '
if line:
- command = line[0]
+ # This is bytes, so we need to convert it to a str
+ command: str = chr(line[0])
affects_old = affects_new = False
@@ -786,8 +787,8 @@ class DiffProcessor(object):
old_line += affects_old
new_line += affects_new
lines.append({
- 'old_lineno': affects_old and old_line or '',
- 'new_lineno': affects_new and new_line or '',
+ 'old_lineno': affects_old and old_line or None,
+ 'new_lineno': affects_new and new_line or None,
'action': action,
'line': self._clean_line(line, command)
})
@@ -815,7 +816,7 @@ class DiffProcessor(object):
except StopIteration:
pass
- return ''.join(raw_diff), chunks, stats
+ return b''.join(raw_diff), chunks, stats
def _safe_id(self, idstring):
"""Make a string safe for including in an id attribute.
@@ -833,24 +834,24 @@ class DiffProcessor(object):
"""
# Transform all whitespace to underscore
- idstring = re.sub(r'\s', "_", '%s' % idstring)
+ idstring = re.sub(r'\s', "_", f'{idstring}')
# Remove everything that is not a hyphen or a member of \w
idstring = re.sub(r'(?!-)\W', "", idstring).lower()
return idstring
@classmethod
- def diff_splitter(cls, string):
+ def diff_splitter(cls, diff_string: bytes):
"""
Diff split that emulates .splitlines() but works only on \n
"""
- if not string:
+ if not diff_string:
return
- elif string == '\n':
- yield '\n'
+ elif diff_string == b'\n':
+ yield b'\n'
else:
- has_newline = string.endswith('\n')
- elements = string.split('\n')
+ has_newline = diff_string.endswith(b'\n')
+ elements = diff_string.split(b'\n')
if has_newline:
# skip last element as it's empty string from newlines
elements = elements[:-1]
@@ -860,9 +861,9 @@ class DiffProcessor(object):
for cnt, line in enumerate(elements, start=1):
last_line = cnt == len_elements
if last_line and not has_newline:
- yield safe_unicode(line)
+ yield line
else:
- yield safe_unicode(line) + '\n'
+ yield line + b'\n'
def prepare(self, inline_diff=True):
"""
@@ -879,132 +880,7 @@ class DiffProcessor(object):
"""
Returns raw diff as a byte string
"""
- return self._diff.raw
-
- def as_html(self, table_class='code-difftable', line_class='line',
- old_lineno_class='lineno old', new_lineno_class='lineno new',
- code_class='code', enable_comments=False, parsed_lines=None):
- """
- Return given diff as html table with customized css classes
- """
- # TODO(marcink): not sure how to pass in translator
- # here in an efficient way, leave the _ for proper gettext extraction
- _ = lambda s: s
-
- def _link_to_if(condition, label, url):
- """
- Generates a link if condition is meet or just the label if not.
- """
-
- if condition:
- return '''%(label)s''' % {
- 'title': _('Click to select line'),
- 'url': url,
- 'label': label
- }
- else:
- return label
- if not self.parsed:
- self.prepare()
-
- diff_lines = self.parsed_diff
- if parsed_lines:
- diff_lines = parsed_lines
-
- _html_empty = True
- _html = []
- _html.append('''