|
|
#
|
|
|
# Copyright (C) 2004-2009 Edgewall Software
|
|
|
# Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
|
|
|
# Copyright (C) 2014-2023 RhodeCode GmbH
|
|
|
# All rights reserved.
|
|
|
#
|
|
|
# This software is licensed as described in the file COPYING, which
|
|
|
# you should have received as part of this distribution. The terms
|
|
|
# are also available at http://trac.edgewall.org/wiki/TracLicense.
|
|
|
#
|
|
|
# This software consists of voluntary contributions made by many
|
|
|
# individuals. For the exact contribution history, see the revision
|
|
|
# history and logs, available at http://trac.edgewall.org/log/.
|
|
|
#
|
|
|
# Author: Christopher Lenz <cmlenz@gmx.de>
|
|
|
|
|
|
import difflib
|
|
|
|
|
|
|
|
|
def get_filtered_hunks(from_lines, to_lines, context=None,
|
|
|
ignore_blank_lines: bool = False, ignore_case: bool = False,
|
|
|
ignore_space_changes: bool = False):
|
|
|
"""Retrieve differences in the form of `difflib.SequenceMatcher`
|
|
|
opcodes, grouped according to the ``context`` and ``ignore_*``
|
|
|
parameters.
|
|
|
|
|
|
:param from_lines: list of lines corresponding to the old content
|
|
|
:param to_lines: list of lines corresponding to the new content
|
|
|
:param ignore_blank_lines: differences about empty lines only are ignored
|
|
|
:param ignore_case: upper case / lower case only differences are ignored
|
|
|
:param ignore_space_changes: differences in amount of spaces are ignored
|
|
|
:param context: the number of "equal" lines kept for representing
|
|
|
the context of the change
|
|
|
:return: generator of grouped `difflib.SequenceMatcher` opcodes
|
|
|
|
|
|
If none of the ``ignore_*`` parameters is `True`, there's nothing
|
|
|
to filter out the results will come straight from the
|
|
|
SequenceMatcher.
|
|
|
"""
|
|
|
hunks = get_hunks(from_lines, to_lines, context)
|
|
|
if ignore_space_changes or ignore_case or ignore_blank_lines:
|
|
|
hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context,
|
|
|
ignore_blank_lines, ignore_case,
|
|
|
ignore_space_changes)
|
|
|
return hunks
|
|
|
|
|
|
|
|
|
def get_hunks(from_lines, to_lines, context=None):
|
|
|
"""Generator yielding grouped opcodes describing differences .
|
|
|
|
|
|
See `get_filtered_hunks` for the parameter descriptions.
|
|
|
"""
|
|
|
matcher = difflib.SequenceMatcher(None, from_lines, to_lines)
|
|
|
if context is None:
|
|
|
return (hunk for hunk in [matcher.get_opcodes()])
|
|
|
else:
|
|
|
return matcher.get_grouped_opcodes(context)
|
|
|
|
|
|
|
|
|
def filter_ignorable_lines(hunks, from_lines, to_lines, context,
|
|
|
ignore_blank_lines, ignore_case,
|
|
|
ignore_space_changes):
|
|
|
"""Detect line changes that should be ignored and emits them as
|
|
|
tagged as "equal", possibly joined with the preceding and/or
|
|
|
following "equal" block.
|
|
|
|
|
|
See `get_filtered_hunks` for the parameter descriptions.
|
|
|
"""
|
|
|
def is_ignorable(tag, fromlines, tolines):
|
|
|
|
|
|
if tag == 'delete' and ignore_blank_lines:
|
|
|
if b''.join(fromlines) == b'':
|
|
|
return True
|
|
|
elif tag == 'insert' and ignore_blank_lines:
|
|
|
if b''.join(tolines) == b'':
|
|
|
return True
|
|
|
elif tag == 'replace' and (ignore_case or ignore_space_changes):
|
|
|
if len(fromlines) != len(tolines):
|
|
|
return False
|
|
|
|
|
|
def f(input_str):
|
|
|
if ignore_case:
|
|
|
input_str = input_str.lower()
|
|
|
if ignore_space_changes:
|
|
|
input_str = b' '.join(input_str.split())
|
|
|
return input_str
|
|
|
|
|
|
for i in range(len(fromlines)):
|
|
|
if f(fromlines[i]) != f(tolines[i]):
|
|
|
return False
|
|
|
return True
|
|
|
|
|
|
hunks = list(hunks)
|
|
|
opcodes = []
|
|
|
ignored_lines = False
|
|
|
prev = None
|
|
|
for hunk in hunks:
|
|
|
for tag, i1, i2, j1, j2 in hunk:
|
|
|
if tag == 'equal':
|
|
|
if prev:
|
|
|
prev = (tag, prev[1], i2, prev[3], j2)
|
|
|
else:
|
|
|
prev = (tag, i1, i2, j1, j2)
|
|
|
else:
|
|
|
if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]):
|
|
|
ignored_lines = True
|
|
|
if prev:
|
|
|
prev = 'equal', prev[1], i2, prev[3], j2
|
|
|
else:
|
|
|
prev = 'equal', i1, i2, j1, j2
|
|
|
continue
|
|
|
if prev:
|
|
|
opcodes.append(prev)
|
|
|
opcodes.append((tag, i1, i2, j1, j2))
|
|
|
prev = None
|
|
|
if prev:
|
|
|
opcodes.append(prev)
|
|
|
|
|
|
if ignored_lines:
|
|
|
if context is None:
|
|
|
yield opcodes
|
|
|
else:
|
|
|
# we leave at most n lines with the tag 'equal' before and after
|
|
|
# every change
|
|
|
n = context
|
|
|
nn = n + n
|
|
|
|
|
|
group = []
|
|
|
|
|
|
def all_equal():
|
|
|
all(op[0] == 'equal' for op in group)
|
|
|
for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
|
|
|
if idx == 0 and tag == 'equal': # Fixup leading unchanged block
|
|
|
i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
|
|
|
elif tag == 'equal' and i2 - i1 > nn:
|
|
|
group.append((tag, i1, min(i2, i1 + n), j1,
|
|
|
min(j2, j1 + n)))
|
|
|
if not all_equal():
|
|
|
yield group
|
|
|
group = []
|
|
|
i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
|
|
|
group.append((tag, i1, i2, j1, j2))
|
|
|
|
|
|
if group and not (len(group) == 1 and group[0][0] == 'equal'):
|
|
|
if group[-1][0] == 'equal': # Fixup trailing unchanged block
|
|
|
tag, i1, i2, j1, j2 = group[-1]
|
|
|
group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
|
|
|
if not all_equal():
|
|
|
yield group
|
|
|
else:
|
|
|
for hunk in hunks:
|
|
|
yield hunk
|
|
|
|
|
|
|
|
|
NO_NEWLINE_AT_END = b'\\ No newline at end of file'
|
|
|
LINE_TERM = b'\n'
|
|
|
|
|
|
|
|
|
def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False,
|
|
|
ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes:
|
|
|
"""
|
|
|
Generator producing lines corresponding to a textual diff.
|
|
|
|
|
|
See `get_filtered_hunks` for the parameter descriptions.
|
|
|
"""
|
|
|
# TODO: johbo: Check if this can be nicely integrated into the matching
|
|
|
|
|
|
if ignore_space_changes:
|
|
|
from_lines = [l.strip() for l in from_lines]
|
|
|
to_lines = [l.strip() for l in to_lines]
|
|
|
|
|
|
def _hunk_range(start, length) -> bytes:
|
|
|
if length != 1:
|
|
|
return b'%d,%d' % (start, length)
|
|
|
else:
|
|
|
return b'%d' % (start,)
|
|
|
|
|
|
for group in get_filtered_hunks(from_lines, to_lines, context,
|
|
|
ignore_blank_lines, ignore_case,
|
|
|
ignore_space_changes):
|
|
|
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
|
|
|
if i1 == 0 and i2 == 0:
|
|
|
i1, i2 = -1, -1 # support for Add changes
|
|
|
if j1 == 0 and j2 == 0:
|
|
|
j1, j2 = -1, -1 # support for Delete changes
|
|
|
yield b'@@ -%b +%b @@%b' % (
|
|
|
_hunk_range(i1 + 1, i2 - i1),
|
|
|
_hunk_range(j1 + 1, j2 - j1),
|
|
|
lineterm)
|
|
|
for tag, i1, i2, j1, j2 in group:
|
|
|
if tag == 'equal':
|
|
|
for line in from_lines[i1:i2]:
|
|
|
if not line.endswith(lineterm):
|
|
|
yield b' ' + line + lineterm
|
|
|
yield NO_NEWLINE_AT_END + lineterm
|
|
|
else:
|
|
|
yield b' ' + line
|
|
|
else:
|
|
|
if tag in ('replace', 'delete'):
|
|
|
for line in from_lines[i1:i2]:
|
|
|
if not line.endswith(lineterm):
|
|
|
yield b'-' + line + lineterm
|
|
|
yield NO_NEWLINE_AT_END + lineterm
|
|
|
else:
|
|
|
yield b'-' + line
|
|
|
if tag in ('replace', 'insert'):
|
|
|
for line in to_lines[j1:j2]:
|
|
|
if not line.endswith(lineterm):
|
|
|
yield b'+' + line + lineterm
|
|
|
yield NO_NEWLINE_AT_END + lineterm
|
|
|
else:
|
|
|
yield b'+' + line
|
|
|
|