# # Copyright (C) 2004-2009 Edgewall Software # Copyright (C) 2004-2006 Christopher Lenz # Copyright (C) 2014-2023 RhodeCode GmbH # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://trac.edgewall.org/wiki/TracLicense. # # This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at http://trac.edgewall.org/log/. # # Author: Christopher Lenz import difflib def get_filtered_hunks(from_lines, to_lines, context=None, ignore_blank_lines: bool = False, ignore_case: bool = False, ignore_space_changes: bool = False): """Retrieve differences in the form of `difflib.SequenceMatcher` opcodes, grouped according to the ``context`` and ``ignore_*`` parameters. :param from_lines: list of lines corresponding to the old content :param to_lines: list of lines corresponding to the new content :param ignore_blank_lines: differences about empty lines only are ignored :param ignore_case: upper case / lower case only differences are ignored :param ignore_space_changes: differences in amount of spaces are ignored :param context: the number of "equal" lines kept for representing the context of the change :return: generator of grouped `difflib.SequenceMatcher` opcodes If none of the ``ignore_*`` parameters is `True`, there's nothing to filter out the results will come straight from the SequenceMatcher. """ hunks = get_hunks(from_lines, to_lines, context) if ignore_space_changes or ignore_case or ignore_blank_lines: hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context, ignore_blank_lines, ignore_case, ignore_space_changes) return hunks def get_hunks(from_lines, to_lines, context=None): """Generator yielding grouped opcodes describing differences . See `get_filtered_hunks` for the parameter descriptions. """ matcher = difflib.SequenceMatcher(None, from_lines, to_lines) if context is None: return (hunk for hunk in [matcher.get_opcodes()]) else: return matcher.get_grouped_opcodes(context) def filter_ignorable_lines(hunks, from_lines, to_lines, context, ignore_blank_lines, ignore_case, ignore_space_changes): """Detect line changes that should be ignored and emits them as tagged as "equal", possibly joined with the preceding and/or following "equal" block. See `get_filtered_hunks` for the parameter descriptions. """ def is_ignorable(tag, fromlines, tolines): if tag == 'delete' and ignore_blank_lines: if b''.join(fromlines) == b'': return True elif tag == 'insert' and ignore_blank_lines: if b''.join(tolines) == b'': return True elif tag == 'replace' and (ignore_case or ignore_space_changes): if len(fromlines) != len(tolines): return False def f(input_str): if ignore_case: input_str = input_str.lower() if ignore_space_changes: input_str = b' '.join(input_str.split()) return input_str for i in range(len(fromlines)): if f(fromlines[i]) != f(tolines[i]): return False return True hunks = list(hunks) opcodes = [] ignored_lines = False prev = None for hunk in hunks: for tag, i1, i2, j1, j2 in hunk: if tag == 'equal': if prev: prev = (tag, prev[1], i2, prev[3], j2) else: prev = (tag, i1, i2, j1, j2) else: if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]): ignored_lines = True if prev: prev = 'equal', prev[1], i2, prev[3], j2 else: prev = 'equal', i1, i2, j1, j2 continue if prev: opcodes.append(prev) opcodes.append((tag, i1, i2, j1, j2)) prev = None if prev: opcodes.append(prev) if ignored_lines: if context is None: yield opcodes else: # we leave at most n lines with the tag 'equal' before and after # every change n = context nn = n + n group = [] def all_equal(): all(op[0] == 'equal' for op in group) for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes): if idx == 0 and tag == 'equal': # Fixup leading unchanged block i1, j1 = max(i1, i2 - n), max(j1, j2 - n) elif tag == 'equal' and i2 - i1 > nn: group.append((tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n))) if not all_equal(): yield group group = [] i1, j1 = max(i1, i2 - n), max(j1, j2 - n) group.append((tag, i1, i2, j1, j2)) if group and not (len(group) == 1 and group[0][0] == 'equal'): if group[-1][0] == 'equal': # Fixup trailing unchanged block tag, i1, i2, j1, j2 = group[-1] group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n) if not all_equal(): yield group else: for hunk in hunks: yield hunk NO_NEWLINE_AT_END = b'\\ No newline at end of file' LINE_TERM = b'\n' def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False, ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes: """ Generator producing lines corresponding to a textual diff. See `get_filtered_hunks` for the parameter descriptions. """ # TODO: johbo: Check if this can be nicely integrated into the matching if ignore_space_changes: from_lines = [l.strip() for l in from_lines] to_lines = [l.strip() for l in to_lines] def _hunk_range(start, length) -> bytes: if length != 1: return b'%d,%d' % (start, length) else: return b'%d' % (start,) for group in get_filtered_hunks(from_lines, to_lines, context, ignore_blank_lines, ignore_case, ignore_space_changes): i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] if i1 == 0 and i2 == 0: i1, i2 = -1, -1 # support for Add changes if j1 == 0 and j2 == 0: j1, j2 = -1, -1 # support for Delete changes yield b'@@ -%b +%b @@%b' % ( _hunk_range(i1 + 1, i2 - i1), _hunk_range(j1 + 1, j2 - j1), lineterm) for tag, i1, i2, j1, j2 in group: if tag == 'equal': for line in from_lines[i1:i2]: if not line.endswith(lineterm): yield b' ' + line + lineterm yield NO_NEWLINE_AT_END + lineterm else: yield b' ' + line else: if tag in ('replace', 'delete'): for line in from_lines[i1:i2]: if not line.endswith(lineterm): yield b'-' + line + lineterm yield NO_NEWLINE_AT_END + lineterm else: yield b'-' + line if tag in ('replace', 'insert'): for line in to_lines[j1:j2]: if not line.endswith(lineterm): yield b'+' + line + lineterm yield NO_NEWLINE_AT_END + lineterm else: yield b'+' + line