svn_diff.py
212 lines
| 8.2 KiB
| text/x-python
|
PythonLexer
/ vcsserver / svn_diff.py
r0 | # | |||
# Copyright (C) 2004-2009 Edgewall Software | ||||
# Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de> | ||||
r1126 | # Copyright (C) 2014-2023 RhodeCode GmbH | |||
r0 | # All rights reserved. | |||
# | ||||
# This software is licensed as described in the file COPYING, which | ||||
# you should have received as part of this distribution. The terms | ||||
# are also available at http://trac.edgewall.org/wiki/TracLicense. | ||||
# | ||||
# This software consists of voluntary contributions made by many | ||||
# individuals. For the exact contribution history, see the revision | ||||
# history and logs, available at http://trac.edgewall.org/log/. | ||||
# | ||||
# Author: Christopher Lenz <cmlenz@gmx.de> | ||||
import difflib | ||||
r1124 | def get_filtered_hunks(from_lines, to_lines, context=None, | |||
ignore_blank_lines: bool = False, ignore_case: bool = False, | ||||
ignore_space_changes: bool = False): | ||||
r0 | """Retrieve differences in the form of `difflib.SequenceMatcher` | |||
opcodes, grouped according to the ``context`` and ``ignore_*`` | ||||
parameters. | ||||
r1124 | :param from_lines: list of lines corresponding to the old content | |||
:param to_lines: list of lines corresponding to the new content | ||||
r0 | :param ignore_blank_lines: differences about empty lines only are ignored | |||
:param ignore_case: upper case / lower case only differences are ignored | ||||
:param ignore_space_changes: differences in amount of spaces are ignored | ||||
:param context: the number of "equal" lines kept for representing | ||||
the context of the change | ||||
:return: generator of grouped `difflib.SequenceMatcher` opcodes | ||||
If none of the ``ignore_*`` parameters is `True`, there's nothing | ||||
to filter out the results will come straight from the | ||||
SequenceMatcher. | ||||
""" | ||||
r1124 | hunks = get_hunks(from_lines, to_lines, context) | |||
r0 | if ignore_space_changes or ignore_case or ignore_blank_lines: | |||
r1124 | hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context, | |||
r0 | ignore_blank_lines, ignore_case, | |||
ignore_space_changes) | ||||
return hunks | ||||
r1124 | def get_hunks(from_lines, to_lines, context=None): | |||
r0 | """Generator yielding grouped opcodes describing differences . | |||
See `get_filtered_hunks` for the parameter descriptions. | ||||
""" | ||||
r1124 | matcher = difflib.SequenceMatcher(None, from_lines, to_lines) | |||
r0 | if context is None: | |||
return (hunk for hunk in [matcher.get_opcodes()]) | ||||
else: | ||||
return matcher.get_grouped_opcodes(context) | ||||
r1124 | def filter_ignorable_lines(hunks, from_lines, to_lines, context, | |||
r0 | ignore_blank_lines, ignore_case, | |||
ignore_space_changes): | ||||
"""Detect line changes that should be ignored and emits them as | ||||
tagged as "equal", possibly joined with the preceding and/or | ||||
following "equal" block. | ||||
See `get_filtered_hunks` for the parameter descriptions. | ||||
""" | ||||
def is_ignorable(tag, fromlines, tolines): | ||||
r1124 | ||||
r0 | if tag == 'delete' and ignore_blank_lines: | |||
r1124 | if b''.join(fromlines) == b'': | |||
r0 | return True | |||
elif tag == 'insert' and ignore_blank_lines: | ||||
r1124 | if b''.join(tolines) == b'': | |||
r0 | return True | |||
elif tag == 'replace' and (ignore_case or ignore_space_changes): | ||||
if len(fromlines) != len(tolines): | ||||
return False | ||||
r119 | ||||
def f(input_str): | ||||
r0 | if ignore_case: | |||
r119 | input_str = input_str.lower() | |||
r0 | if ignore_space_changes: | |||
r1124 | input_str = b' '.join(input_str.split()) | |||
r119 | return input_str | |||
r0 | for i in range(len(fromlines)): | |||
if f(fromlines[i]) != f(tolines[i]): | ||||
return False | ||||
return True | ||||
hunks = list(hunks) | ||||
opcodes = [] | ||||
ignored_lines = False | ||||
prev = None | ||||
for hunk in hunks: | ||||
for tag, i1, i2, j1, j2 in hunk: | ||||
if tag == 'equal': | ||||
if prev: | ||||
prev = (tag, prev[1], i2, prev[3], j2) | ||||
else: | ||||
prev = (tag, i1, i2, j1, j2) | ||||
else: | ||||
r1124 | if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]): | |||
r0 | ignored_lines = True | |||
if prev: | ||||
prev = 'equal', prev[1], i2, prev[3], j2 | ||||
else: | ||||
prev = 'equal', i1, i2, j1, j2 | ||||
continue | ||||
if prev: | ||||
opcodes.append(prev) | ||||
opcodes.append((tag, i1, i2, j1, j2)) | ||||
prev = None | ||||
if prev: | ||||
opcodes.append(prev) | ||||
if ignored_lines: | ||||
if context is None: | ||||
yield opcodes | ||||
else: | ||||
# we leave at most n lines with the tag 'equal' before and after | ||||
# every change | ||||
n = context | ||||
nn = n + n | ||||
group = [] | ||||
r1124 | ||||
r0 | def all_equal(): | |||
all(op[0] == 'equal' for op in group) | ||||
for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes): | ||||
r1124 | if idx == 0 and tag == 'equal': # Fixup leading unchanged block | |||
r0 | i1, j1 = max(i1, i2 - n), max(j1, j2 - n) | |||
elif tag == 'equal' and i2 - i1 > nn: | ||||
group.append((tag, i1, min(i2, i1 + n), j1, | ||||
min(j2, j1 + n))) | ||||
if not all_equal(): | ||||
yield group | ||||
group = [] | ||||
i1, j1 = max(i1, i2 - n), max(j1, j2 - n) | ||||
group.append((tag, i1, i2, j1, j2)) | ||||
if group and not (len(group) == 1 and group[0][0] == 'equal'): | ||||
r1124 | if group[-1][0] == 'equal': # Fixup trailing unchanged block | |||
r0 | tag, i1, i2, j1, j2 = group[-1] | |||
group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n) | ||||
if not all_equal(): | ||||
yield group | ||||
else: | ||||
for hunk in hunks: | ||||
yield hunk | ||||
r1124 | NO_NEWLINE_AT_END = b'\\ No newline at end of file' | |||
LINE_TERM = b'\n' | ||||
r0 | ||||
r1124 | def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False, | |||
ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes: | ||||
r0 | """ | |||
Generator producing lines corresponding to a textual diff. | ||||
See `get_filtered_hunks` for the parameter descriptions. | ||||
""" | ||||
# TODO: johbo: Check if this can be nicely integrated into the matching | ||||
r1085 | ||||
r0 | if ignore_space_changes: | |||
r1124 | from_lines = [l.strip() for l in from_lines] | |||
to_lines = [l.strip() for l in to_lines] | ||||
r0 | ||||
r1124 | def _hunk_range(start, length) -> bytes: | |||
if length != 1: | ||||
return b'%d,%d' % (start, length) | ||||
else: | ||||
return b'%d' % (start,) | ||||
for group in get_filtered_hunks(from_lines, to_lines, context, | ||||
r0 | ignore_blank_lines, ignore_case, | |||
ignore_space_changes): | ||||
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] | ||||
if i1 == 0 and i2 == 0: | ||||
i1, i2 = -1, -1 # support for Add changes | ||||
if j1 == 0 and j2 == 0: | ||||
j1, j2 = -1, -1 # support for Delete changes | ||||
r1124 | yield b'@@ -%b +%b @@%b' % ( | |||
r0 | _hunk_range(i1 + 1, i2 - i1), | |||
_hunk_range(j1 + 1, j2 - j1), | ||||
lineterm) | ||||
for tag, i1, i2, j1, j2 in group: | ||||
if tag == 'equal': | ||||
r1124 | for line in from_lines[i1:i2]: | |||
r0 | if not line.endswith(lineterm): | |||
r1124 | yield b' ' + line + lineterm | |||
r0 | yield NO_NEWLINE_AT_END + lineterm | |||
else: | ||||
r1124 | yield b' ' + line | |||
r0 | else: | |||
if tag in ('replace', 'delete'): | ||||
r1124 | for line in from_lines[i1:i2]: | |||
r0 | if not line.endswith(lineterm): | |||
r1124 | yield b'-' + line + lineterm | |||
r0 | yield NO_NEWLINE_AT_END + lineterm | |||
else: | ||||
r1124 | yield b'-' + line | |||
r0 | if tag in ('replace', 'insert'): | |||
r1124 | for line in to_lines[j1:j2]: | |||
r0 | if not line.endswith(lineterm): | |||
r1124 | yield b'+' + line + lineterm | |||
r0 | yield NO_NEWLINE_AT_END + lineterm | |||
else: | ||||
r1124 | yield b'+' + line | |||