mdiff.py
621 lines
| 19.3 KiB
| text/x-python
|
PythonLexer
/ mercurial / mdiff.py
mpm@selenic.com
|
r239 | # mdiff.py - diff and patch routines for mercurial | ||
# | ||||
Raphaël Gomès
|
r47575 | # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com> | ||
mpm@selenic.com
|
r239 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
mpm@selenic.com
|
r239 | |||
Matt Harbison
|
r52756 | from __future__ import annotations | ||
Gregory Szorc
|
r27484 | |||
import re | ||||
import struct | ||||
Matt Harbison
|
r52829 | import typing | ||
Gregory Szorc
|
r27484 | import zlib | ||
Matt Harbison
|
r52829 | from typing import ( | ||
Iterable, | ||||
Iterator, | ||||
List, | ||||
Optional, | ||||
Sequence, | ||||
Tuple, | ||||
Union, | ||||
cast, | ||||
) | ||||
Gregory Szorc
|
r27484 | from .i18n import _ | ||
from . import ( | ||||
Rodrigo Damazio Bovendorp
|
r45714 | diffhelper, | ||
Yuya Nishihara
|
r36432 | encoding, | ||
Gregory Szorc
|
r27484 | error, | ||
Yuya Nishihara
|
r32369 | policy, | ||
Pulkit Goyal
|
r31631 | pycompat, | ||
Gregory Szorc
|
r27484 | util, | ||
) | ||||
Matt Harbison
|
r52826 | from .interfaces import ( | ||
modules as intmod, | ||||
) | ||||
Boris Feld
|
r36625 | from .utils import dateutil | ||
mpm@selenic.com
|
r0 | |||
Matt Harbison
|
r52826 | bdiff: intmod.BDiff = policy.importmod('bdiff') | ||
Matt Harbison
|
r52857 | mpatch: intmod.MPatch = policy.importmod('mpatch') | ||
Yuya Nishihara
|
r32369 | |||
Yuya Nishihara
|
r32201 | blocks = bdiff.blocks | ||
fixws = bdiff.fixws | ||||
Yuya Nishihara
|
r32199 | patches = mpatch.patches | ||
patchedsize = mpatch.patchedsize | ||||
Gregory Szorc
|
r36673 | textdiff = bdiff.bdiff | ||
Augie Fackler
|
r36163 | splitnewlines = bdiff.splitnewlines | ||
Vadim Gelfer
|
r2248 | |||
Matt Harbison
|
r52829 | if typing.TYPE_CHECKING: | ||
HunkLines = List[bytes] | ||||
"""Lines of a hunk- a header, followed by line additions and deletions.""" | ||||
HunkRange = Tuple[int, int, int, int] | ||||
"""HunkRange represents the range information of a hunk. | ||||
The tuple (s1, l1, s2, l2) forms the header '@@ -s1,l1 +s2,l2 @@'.""" | ||||
Range = Tuple[int, int] | ||||
"""A (lowerbound, upperbound) range tuple.""" | ||||
TypedBlock = Tuple[intmod.BDiffBlock, bytes] | ||||
"""A bdiff block with its type.""" | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r43784 | # TODO: this looks like it could be an attrs, which might help pytype | ||
Gregory Szorc
|
r49801 | class diffopts: | ||
Augie Fackler
|
r46554 | """context is the number of context lines | ||
Vadim Gelfer
|
r2874 | text treats all files as text | ||
showfunc enables diff -p output | ||||
Brendan Cully
|
r2907 | git enables the git extended patch format | ||
Stephen Darnell
|
r3199 | nodates removes dates from diff headers | ||
Siddharth Agarwal
|
r23293 | nobinary ignores binary files | ||
Siddharth Agarwal
|
r23294 | noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode) | ||
Vadim Gelfer
|
r2874 | ignorews ignores all whitespace changes in the diff | ||
ignorewsamount ignores changes in the amount of whitespace | ||||
Patrick Mezard
|
r10189 | ignoreblanklines ignores changes whose lines are all blank | ||
upgrade generates git diffs to avoid data loss | ||||
Augie Fackler
|
r46554 | """ | ||
Thomas Arendsen Hein
|
r396 | |||
Augie Fackler
|
r43784 | _HAS_DYNAMIC_ATTRIBUTES = True | ||
Vadim Gelfer
|
r2874 | defaults = { | ||
Augie Fackler
|
r43347 | b'context': 3, | ||
b'text': False, | ||||
b'showfunc': False, | ||||
b'git': False, | ||||
b'nodates': False, | ||||
b'nobinary': False, | ||||
b'noprefix': False, | ||||
b'index': 0, | ||||
b'ignorews': False, | ||||
b'ignorewsamount': False, | ||||
b'ignorewseol': False, | ||||
b'ignoreblanklines': False, | ||||
b'upgrade': False, | ||||
b'showsimilarity': False, | ||||
b'worddiff': False, | ||||
b'xdiff': False, | ||||
Augie Fackler
|
r43346 | } | ||
Vadim Gelfer
|
r2874 | |||
def __init__(self, **opts): | ||||
Pulkit Goyal
|
r31631 | opts = pycompat.byteskwargs(opts) | ||
Gregory Szorc
|
r29416 | for k in self.defaults.keys(): | ||
Vadim Gelfer
|
r2874 | v = opts.get(k) | ||
if v is None: | ||||
v = self.defaults[k] | ||||
r51807 | setattr(self, pycompat.sysstr(k), v) | |||
Vadim Gelfer
|
r2874 | |||
Patrick Mezard
|
r6467 | try: | ||
self.context = int(self.context) | ||||
except ValueError: | ||||
Martin von Zweigbergk
|
r49188 | raise error.InputError( | ||
Martin von Zweigbergk
|
r43387 | _(b'diff context lines count must be an integer, not %r') | ||
Augie Fackler
|
r43346 | % pycompat.bytestr(self.context) | ||
) | ||||
Patrick Mezard
|
r6467 | |||
Patrick Mezard
|
r10185 | def copy(self, **kwargs): | ||
r51807 | opts = {k: getattr(self, pycompat.sysstr(k)) for k in self.defaults} | |||
Pulkit Goyal
|
r33102 | opts = pycompat.strkwargs(opts) | ||
Patrick Mezard
|
r10185 | opts.update(kwargs) | ||
return diffopts(**opts) | ||||
Matt Harbison
|
r50792 | def __bytes__(self): | ||
return b", ".join( | ||||
r51807 | b"%s: %r" % (k, getattr(self, pycompat.sysstr(k))) | |||
for k in self.defaults | ||||
Matt Harbison
|
r50792 | ) | ||
__str__ = encoding.strmethod(__bytes__) | ||||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2874 | defaultopts = diffopts() | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def wsclean(opts: diffopts, text: bytes, blank: bool = True) -> bytes: | ||
Matt Mackall
|
r4878 | if opts.ignorews: | ||
Matt Harbison
|
r52825 | text = bdiff.fixws(text, True) | ||
Matt Mackall
|
r4878 | elif opts.ignorewsamount: | ||
Matt Harbison
|
r52825 | text = bdiff.fixws(text, False) | ||
Patrick Mezard
|
r9827 | if blank and opts.ignoreblanklines: | ||
Augie Fackler
|
r43347 | text = re.sub(b'\n+', b'\n', text).strip(b'\n') | ||
David Soria Parra
|
r34015 | if opts.ignorewseol: | ||
Pulkit Goyal
|
r37389 | text = re.sub(br'[ \t\r\f]+\n', br'\n', text) | ||
Matt Mackall
|
r4878 | return text | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def splitblock( | ||
base1: int, | ||||
lines1: Iterable[bytes], | ||||
base2: int, | ||||
lines2: Iterable[bytes], | ||||
opts: diffopts, | ||||
) -> Iterable[TypedBlock]: | ||||
Patrick Mezard
|
r15528 | # The input lines matches except for interwoven blank lines. We | ||
# transform it into a sequence of matching blocks and blank blocks. | ||||
lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1] | ||||
lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2] | ||||
s1, e1 = 0, len(lines1) | ||||
s2, e2 = 0, len(lines2) | ||||
while s1 < e1 or s2 < e2: | ||||
Augie Fackler
|
r43347 | i1, i2, btype = s1, s2, b'=' | ||
Augie Fackler
|
r43346 | if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0: | ||
Patrick Mezard
|
r15528 | # Consume the block of blank lines | ||
Augie Fackler
|
r43347 | btype = b'~' | ||
Patrick Mezard
|
r15528 | while i1 < e1 and lines1[i1] == 0: | ||
i1 += 1 | ||||
while i2 < e2 and lines2[i2] == 0: | ||||
i2 += 1 | ||||
else: | ||||
# Consume the matching lines | ||||
while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1: | ||||
i1 += 1 | ||||
i2 += 1 | ||||
Matt Harbison
|
r52828 | yield (base1 + s1, base1 + i1, base2 + s2, base2 + i2), btype | ||
Patrick Mezard
|
r15528 | s1 = i1 | ||
s2 = i2 | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def hunkinrange(hunk: Tuple[int, int], linerange: Range) -> bool: | ||
Denis Laxalde
|
r31808 | """Return True if `hunk` defined as (start, length) is in `linerange` | ||
defined as (lowerbound, upperbound). | ||||
>>> hunkinrange((5, 10), (2, 7)) | ||||
True | ||||
>>> hunkinrange((5, 10), (6, 12)) | ||||
True | ||||
>>> hunkinrange((5, 10), (13, 17)) | ||||
True | ||||
>>> hunkinrange((5, 10), (3, 17)) | ||||
True | ||||
>>> hunkinrange((5, 10), (1, 3)) | ||||
False | ||||
>>> hunkinrange((5, 10), (18, 20)) | ||||
False | ||||
>>> hunkinrange((5, 10), (1, 5)) | ||||
False | ||||
>>> hunkinrange((5, 10), (15, 27)) | ||||
False | ||||
""" | ||||
start, length = hunk | ||||
lowerbound, upperbound = linerange | ||||
return lowerbound < start + length and start < upperbound | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def blocksinrange( | ||
blocks: Iterable[TypedBlock], rangeb: Range | ||||
) -> Tuple[List[TypedBlock], Range]: | ||||
Denis Laxalde
|
r30717 | """filter `blocks` like (a1, a2, b1, b2) from items outside line range | ||
`rangeb` from ``(b1, b2)`` point of view. | ||||
Return `filteredblocks, rangea` where: | ||||
* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of | ||||
`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a | ||||
block ``(b1, b2)`` being inside `rangeb` if | ||||
``rangeb[0] < b2 and b1 < rangeb[1]``; | ||||
* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`. | ||||
""" | ||||
lbb, ubb = rangeb | ||||
lba, uba = None, None | ||||
filteredblocks = [] | ||||
for block in blocks: | ||||
(a1, a2, b1, b2), stype = block | ||||
Augie Fackler
|
r43347 | if lbb >= b1 and ubb <= b2 and stype == b'=': | ||
Denis Laxalde
|
r30717 | # rangeb is within a single "=" hunk, restrict back linerange1 | ||
# by offsetting rangeb | ||||
lba = lbb - b1 + a1 | ||||
uba = ubb - b1 + a1 | ||||
else: | ||||
if b1 <= lbb < b2: | ||||
Augie Fackler
|
r43347 | if stype == b'=': | ||
Denis Laxalde
|
r30717 | lba = a2 - (b2 - lbb) | ||
else: | ||||
lba = a1 | ||||
if b1 < ubb <= b2: | ||||
Augie Fackler
|
r43347 | if stype == b'=': | ||
Denis Laxalde
|
r30717 | uba = a1 + (ubb - b1) | ||
else: | ||||
uba = a2 | ||||
Denis Laxalde
|
r31808 | if hunkinrange((b1, (b2 - b1)), rangeb): | ||
Denis Laxalde
|
r30717 | filteredblocks.append(block) | ||
if lba is None or uba is None or uba < lba: | ||||
Martin von Zweigbergk
|
r46485 | raise error.InputError(_(b'line range exceeds file size')) | ||
Denis Laxalde
|
r30717 | return filteredblocks, (lba, uba) | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def chooseblocksfunc(opts: Optional[diffopts] = None) -> intmod.BDiffBlocksFnc: | ||
Matt Harbison
|
r52827 | if ( | ||
opts is None | ||||
or not opts.xdiff | ||||
or not getattr(bdiff, 'xdiffblocks', None) | ||||
): | ||||
Jun Wu
|
r36694 | return bdiff.blocks | ||
else: | ||||
return bdiff.xdiffblocks | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def allblocks( | ||
text1: bytes, | ||||
text2: bytes, | ||||
opts: Optional[diffopts] = None, | ||||
lines1: Optional[Sequence[bytes]] = None, | ||||
lines2: Optional[Sequence[bytes]] = None, | ||||
) -> Iterable[TypedBlock]: | ||||
Patrick Mezard
|
r15526 | """Return (block, type) tuples, where block is an mdiff.blocks | ||
line entry. type is '=' for blocks matching exactly one another | ||||
(bdiff blocks), '!' for non-matching blocks and '~' for blocks | ||||
Philippe Pepiot
|
r30023 | matching only after having filtered blank lines. | ||
Patrick Mezard
|
r15526 | line1 and line2 are text1 and text2 split with splitnewlines() if | ||
they are already available. | ||||
Patrick Mezard
|
r15525 | """ | ||
if opts is None: | ||||
opts = defaultopts | ||||
David Soria Parra
|
r34015 | if opts.ignorews or opts.ignorewsamount or opts.ignorewseol: | ||
Patrick Mezard
|
r15525 | text1 = wsclean(opts, text1, False) | ||
text2 = wsclean(opts, text2, False) | ||||
Jun Wu
|
r36694 | diff = chooseblocksfunc(opts)(text1, text2) | ||
Patrick Mezard
|
r15525 | for i, s1 in enumerate(diff): | ||
# The first match is special. | ||||
# we've either found a match starting at line 0 or a match later | ||||
# in the file. If it starts later, old and new below will both be | ||||
# empty and we'll continue to the next match. | ||||
if i > 0: | ||||
s = diff[i - 1] | ||||
else: | ||||
Matt Harbison
|
r52828 | s = (0, 0, 0, 0) | ||
s = (s[1], s1[0], s[3], s1[2]) | ||||
Patrick Mezard
|
r15525 | |||
# bdiff sometimes gives huge matches past eof, this check eats them, | ||||
# and deals with the special first match case described above | ||||
Patrick Mezard
|
r15529 | if s[0] != s[1] or s[2] != s[3]: | ||
Augie Fackler
|
r43347 | type = b'!' | ||
Patrick Mezard
|
r15526 | if opts.ignoreblanklines: | ||
Patrick Mezard
|
r15529 | if lines1 is None: | ||
lines1 = splitnewlines(text1) | ||||
if lines2 is None: | ||||
lines2 = splitnewlines(text2) | ||||
Augie Fackler
|
r43347 | old = wsclean(opts, b"".join(lines1[s[0] : s[1]])) | ||
new = wsclean(opts, b"".join(lines2[s[2] : s[3]])) | ||||
Patrick Mezard
|
r15529 | if old == new: | ||
Augie Fackler
|
r43347 | type = b'~' | ||
Patrick Mezard
|
r15526 | yield s, type | ||
Augie Fackler
|
r43347 | yield s1, b'=' | ||
Patrick Mezard
|
r15525 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def unidiff( | ||
a: bytes, | ||||
ad: bytes, | ||||
b: bytes, | ||||
bd: bytes, | ||||
fn1: bytes, | ||||
fn2: bytes, | ||||
binary: bool, | ||||
opts: diffopts = defaultopts, | ||||
) -> Tuple[List[bytes], Iterable[Tuple[Optional[HunkRange], HunkLines]]]: | ||||
Denis Laxalde
|
r31273 | """Return a unified diff as a (headers, hunks) tuple. | ||
Denis Laxalde
|
r31271 | |||
If the diff is not null, `headers` is a list with unified diff header | ||||
Denis Laxalde
|
r31273 | lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding | ||
(hunkrange, hunklines) coming from _unidiff(). | ||||
Otherwise, `headers` and `hunks` are empty. | ||||
Joerg Sonnenberger
|
r35868 | |||
Yuya Nishihara
|
r35969 | Set binary=True if either a or b should be taken as a binary file. | ||
Denis Laxalde
|
r31271 | """ | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def datetag(date: bytes, fn: Optional[bytes] = None): | ||
Alexis S. L. Carvalho
|
r4679 | if not opts.git and not opts.nodates: | ||
Augie Fackler
|
r43347 | return b'\t%s' % date | ||
if fn and b' ' in fn: | ||||
return b'\t' | ||||
return b'' | ||||
Brendan Cully
|
r3026 | |||
Denis Laxalde
|
r31273 | sentinel = [], () | ||
Matt Mackall
|
r10282 | if not a and not b: | ||
Denis Laxalde
|
r31271 | return sentinel | ||
Siddharth Agarwal
|
r23299 | |||
if opts.noprefix: | ||||
Augie Fackler
|
r43347 | aprefix = bprefix = b'' | ||
Siddharth Agarwal
|
r23299 | else: | ||
Augie Fackler
|
r43347 | aprefix = b'a/' | ||
bprefix = b'b/' | ||||
Siddharth Agarwal
|
r23299 | |||
Boris Feld
|
r36625 | epoch = dateutil.datestr((0, 0)) | ||
mpm@selenic.com
|
r264 | |||
Mads Kiilerich
|
r15437 | fn1 = util.pconvert(fn1) | ||
fn2 = util.pconvert(fn2) | ||||
Yuya Nishihara
|
r35969 | if binary: | ||
Martin Geisler
|
r6871 | if a and b and len(a) == len(b) and a == b: | ||
Denis Laxalde
|
r31271 | return sentinel | ||
headerlines = [] | ||||
Augie Fackler
|
r43347 | hunks = ((None, [b'Binary file %s has changed\n' % fn1]),) | ||
Thomas Arendsen Hein
|
r1723 | elif not a: | ||
Augie Fackler
|
r43347 | without_newline = not b.endswith(b'\n') | ||
Vadim Gelfer
|
r2251 | b = splitnewlines(b) | ||
Thomas Arendsen Hein
|
r1723 | if a is None: | ||
Augie Fackler
|
r43347 | l1 = b'--- /dev/null%s' % datetag(epoch) | ||
Thomas Arendsen Hein
|
r1723 | else: | ||
Augie Fackler
|
r43347 | l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)) | ||
l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2)) | ||||
Denis Laxalde
|
r31271 | headerlines = [l1, l2] | ||
Denis Laxalde
|
r31273 | size = len(b) | ||
hunkrange = (0, 0, 1, size) | ||||
Augie Fackler
|
r43347 | hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b] | ||
Joerg Sonnenberger
|
r35869 | if without_newline: | ||
Augie Fackler
|
r43347 | hunklines[-1] += b'\n' | ||
Rodrigo Damazio Bovendorp
|
r45714 | hunklines.append(diffhelper.MISSING_NEWLINE_MARKER) | ||
Augie Fackler
|
r43346 | hunks = ((hunkrange, hunklines),) | ||
Thomas Arendsen Hein
|
r1723 | elif not b: | ||
Augie Fackler
|
r43347 | without_newline = not a.endswith(b'\n') | ||
Vadim Gelfer
|
r2251 | a = splitnewlines(a) | ||
Augie Fackler
|
r43347 | l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)) | ||
Thomas Arendsen Hein
|
r1723 | if b is None: | ||
Augie Fackler
|
r43347 | l2 = b'+++ /dev/null%s' % datetag(epoch) | ||
Thomas Arendsen Hein
|
r1723 | else: | ||
Augie Fackler
|
r43347 | l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)) | ||
Denis Laxalde
|
r31271 | headerlines = [l1, l2] | ||
Denis Laxalde
|
r31273 | size = len(a) | ||
hunkrange = (1, size, 0, 0) | ||||
Augie Fackler
|
r43347 | hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a] | ||
Joerg Sonnenberger
|
r35869 | if without_newline: | ||
Augie Fackler
|
r43347 | hunklines[-1] += b'\n' | ||
Rodrigo Damazio Bovendorp
|
r45714 | hunklines.append(diffhelper.MISSING_NEWLINE_MARKER) | ||
Augie Fackler
|
r43346 | hunks = ((hunkrange, hunklines),) | ||
mpm@selenic.com
|
r264 | else: | ||
Joerg Sonnenberger
|
r35870 | hunks = _unidiff(a, b, opts=opts) | ||
if not next(hunks): | ||||
Denis Laxalde
|
r31271 | return sentinel | ||
Benoit Boissinot
|
r10614 | |||
Denis Laxalde
|
r31271 | headerlines = [ | ||
Augie Fackler
|
r43347 | b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)), | ||
b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)), | ||||
Denis Laxalde
|
r31271 | ] | ||
Denis Laxalde
|
r31273 | |||
Matt Harbison
|
r52829 | # The possible bool is consumed from the iterator above in the `next()` | ||
# call. | ||||
return headerlines, cast( | ||||
"Iterable[Tuple[Optional[HunkRange], HunkLines]]", hunks | ||||
) | ||||
mpm@selenic.com
|
r0 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def _unidiff( | ||
t1: bytes, t2: bytes, opts: diffopts = defaultopts | ||||
) -> Iterator[Union[bool, Tuple[HunkRange, HunkLines]]]: | ||||
Denis Laxalde
|
r31269 | """Yield hunks of a headerless unified diff from t1 and t2 texts. | ||
Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a | ||||
tuple (s1, l1, s2, l2) representing the range information of the hunk to | ||||
form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines | ||||
of the hunk combining said header followed by line additions and | ||||
deletions. | ||||
Joerg Sonnenberger
|
r35869 | |||
The hunks are prefixed with a bool. | ||||
Denis Laxalde
|
r31269 | """ | ||
Denis Laxalde
|
r31267 | l1 = splitnewlines(t1) | ||
l2 = splitnewlines(t2) | ||||
Augie Fackler
|
r43346 | |||
mason@suse.com
|
r1637 | def contextend(l, len): | ||
Vadim Gelfer
|
r2874 | ret = l + opts.context | ||
mason@suse.com
|
r1637 | if ret > len: | ||
ret = len | ||||
return ret | ||||
def contextstart(l): | ||||
Vadim Gelfer
|
r2874 | ret = l - opts.context | ||
mason@suse.com
|
r1637 | if ret < 0: | ||
return 0 | ||||
return ret | ||||
Augie Fackler
|
r43347 | lastfunc = [0, b''] | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def yieldhunk( | ||
hunk: Tuple[int, int, int, int, List[bytes]] | ||||
) -> Iterable[Tuple[HunkRange, HunkLines]]: | ||||
mason@suse.com
|
r1637 | (astart, a2, bstart, b2, delta) = hunk | ||
aend = contextend(a2, len(l1)) | ||||
alen = aend - astart | ||||
blen = b2 - bstart + aend - a2 | ||||
Augie Fackler
|
r43347 | func = b"" | ||
Vadim Gelfer
|
r2874 | if opts.showfunc: | ||
Brodie Rao
|
r15141 | lastpos, func = lastfunc | ||
# walk backwards from the start of the context up to the start of | ||||
# the previous hunk context until we find a line starting with an | ||||
# alphanumeric char. | ||||
Manuel Jacob
|
r50179 | for i in range(astart - 1, lastpos - 1, -1): | ||
Pulkit Goyal
|
r35601 | if l1[i][0:1].isalnum(): | ||
Yuya Nishihara
|
r36432 | func = b' ' + l1[i].rstrip() | ||
# split long function name if ASCII. otherwise we have no | ||||
# idea where the multi-byte boundary is, so just leave it. | ||||
if encoding.isasciistr(func): | ||||
func = func[:41] | ||||
Brodie Rao
|
r15141 | lastfunc[1] = func | ||
mason@suse.com
|
r1637 | break | ||
Brodie Rao
|
r15141 | # by recording this hunk's starting point as the next place to | ||
# start looking for function lines, we avoid reading any line in | ||||
# the file more than once. | ||||
lastfunc[0] = astart | ||||
mason@suse.com
|
r1637 | |||
Nicolas Venegas
|
r15462 | # zero-length hunk ranges report their start line as one less | ||
if alen: | ||||
astart += 1 | ||||
if blen: | ||||
bstart += 1 | ||||
Denis Laxalde
|
r31269 | hunkrange = astart, alen, bstart, blen | ||
hunklines = ( | ||||
Augie Fackler
|
r43347 | [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))] | ||
Denis Laxalde
|
r31269 | + delta | ||
Manuel Jacob
|
r50179 | + [b' ' + l1[x] for x in range(a2, aend)] | ||
Denis Laxalde
|
r31269 | ) | ||
Joerg Sonnenberger
|
r35869 | # If either file ends without a newline and the last line of | ||
# that file is part of a hunk, a marker is printed. If the | ||||
# last line of both files is identical and neither ends in | ||||
# a newline, print only one marker. That's the only case in | ||||
# which the hunk can end in a shared line without a newline. | ||||
skip = False | ||||
Augie Fackler
|
r43347 | if not t1.endswith(b'\n') and astart + alen == len(l1) + 1: | ||
Manuel Jacob
|
r50179 | for i in range(len(hunklines) - 1, -1, -1): | ||
Augie Fackler
|
r43347 | if hunklines[i].startswith((b'-', b' ')): | ||
if hunklines[i].startswith(b' '): | ||||
Joerg Sonnenberger
|
r35869 | skip = True | ||
Augie Fackler
|
r43347 | hunklines[i] += b'\n' | ||
Rodrigo Damazio Bovendorp
|
r45714 | hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER) | ||
Joerg Sonnenberger
|
r35869 | break | ||
Augie Fackler
|
r43347 | if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1: | ||
Manuel Jacob
|
r50179 | for i in range(len(hunklines) - 1, -1, -1): | ||
Augie Fackler
|
r43347 | if hunklines[i].startswith(b'+'): | ||
hunklines[i] += b'\n' | ||||
Rodrigo Damazio Bovendorp
|
r45714 | hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER) | ||
Joerg Sonnenberger
|
r35869 | break | ||
Denis Laxalde
|
r31269 | yield hunkrange, hunklines | ||
mason@suse.com
|
r1637 | |||
# bdiff.blocks gives us the matching sequences in the files. The loop | ||||
# below finds the spaces between those matching sequences and translates | ||||
# them into diff output. | ||||
# | ||||
hunk = None | ||||
Patrick Mezard
|
r16089 | ignoredlines = 0 | ||
Joerg Sonnenberger
|
r35870 | has_hunks = False | ||
Patrick Mezard
|
r15526 | for s, stype in allblocks(t1, t2, opts, l1, l2): | ||
Patrick Mezard
|
r16089 | a1, a2, b1, b2 = s | ||
Augie Fackler
|
r43347 | if stype != b'!': | ||
if stype == b'~': | ||||
Patrick Mezard
|
r16089 | # The diff context lines are based on t1 content. When | ||
# blank lines are ignored, the new lines offsets must | ||||
# be adjusted as if equivalent blocks ('~') had the | ||||
# same sizes on both sides. | ||||
ignoredlines += (b2 - b1) - (a2 - a1) | ||||
Patrick Mezard
|
r15526 | continue | ||
mason@suse.com
|
r1637 | delta = [] | ||
old = l1[a1:a2] | ||||
new = l2[b1:b2] | ||||
Patrick Mezard
|
r16089 | b1 -= ignoredlines | ||
b2 -= ignoredlines | ||||
mason@suse.com
|
r1637 | astart = contextstart(a1) | ||
bstart = contextstart(b1) | ||||
prev = None | ||||
if hunk: | ||||
# join with the previous hunk if it falls inside the context | ||||
Vadim Gelfer
|
r2874 | if astart < hunk[1] + opts.context + 1: | ||
mason@suse.com
|
r1637 | prev = hunk | ||
astart = hunk[1] | ||||
bstart = hunk[3] | ||||
else: | ||||
Joerg Sonnenberger
|
r35870 | if not has_hunks: | ||
has_hunks = True | ||||
yield True | ||||
Benoit Boissinot
|
r10614 | for x in yieldhunk(hunk): | ||
mason@suse.com
|
r1637 | yield x | ||
if prev: | ||||
# we've joined the previous hunk, record the new ending points. | ||||
Matt Harbison
|
r52828 | hunk = (hunk[0], a2, hunk[2], b2, hunk[4]) | ||
mason@suse.com
|
r1637 | delta = hunk[4] | ||
else: | ||||
# create a new hunk | ||||
Matt Harbison
|
r52828 | hunk = (astart, a2, bstart, b2, delta) | ||
mason@suse.com
|
r1637 | |||
Augie Fackler
|
r43347 | delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]] | ||
delta[len(delta) :] = [b'-' + x for x in old] | ||||
delta[len(delta) :] = [b'+' + x for x in new] | ||||
mason@suse.com
|
r1637 | |||
if hunk: | ||||
Joerg Sonnenberger
|
r35870 | if not has_hunks: | ||
has_hunks = True | ||||
yield True | ||||
Benoit Boissinot
|
r10614 | for x in yieldhunk(hunk): | ||
mason@suse.com
|
r1637 | yield x | ||
Joerg Sonnenberger
|
r35870 | elif not has_hunks: | ||
yield False | ||||
mason@suse.com
|
r1637 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def b85diff(to: Optional[bytes], tn: Optional[bytes]) -> bytes: | ||
Guillermo Pérez <bisho at fb.com>
|
r17939 | '''print base85-encoded binary diff''' | ||
Augie Fackler
|
r43346 | |||
Guillermo Pérez <bisho at fb.com>
|
r17939 | def fmtline(line): | ||
l = len(line) | ||||
if l <= 26: | ||||
Augie Fackler
|
r43347 | l = pycompat.bytechr(ord(b'A') + l - 1) | ||
Guillermo Pérez <bisho at fb.com>
|
r17939 | else: | ||
Augie Fackler
|
r43347 | l = pycompat.bytechr(l - 26 + ord(b'a') - 1) | ||
return b'%c%s\n' % (l, util.b85encode(line, True)) | ||||
Guillermo Pérez <bisho at fb.com>
|
r17939 | |||
def chunk(text, csize=52): | ||||
l = len(text) | ||||
i = 0 | ||||
while i < l: | ||||
Augie Fackler
|
r43346 | yield text[i : i + csize] | ||
Guillermo Pérez <bisho at fb.com>
|
r17939 | i += csize | ||
Guillermo Pérez
|
r17946 | if to is None: | ||
Augie Fackler
|
r43347 | to = b'' | ||
Guillermo Pérez
|
r17946 | if tn is None: | ||
Augie Fackler
|
r43347 | tn = b'' | ||
Guillermo Pérez
|
r17946 | |||
if to == tn: | ||||
Augie Fackler
|
r43347 | return b'' | ||
Guillermo Pérez <bisho at fb.com>
|
r17939 | |||
# TODO: deltas | ||||
Guillermo Pérez
|
r17946 | ret = [] | ||
Augie Fackler
|
r43347 | ret.append(b'GIT binary patch\n') | ||
ret.append(b'literal %d\n' % len(tn)) | ||||
Guillermo Pérez <bisho at fb.com>
|
r17939 | for l in chunk(zlib.compress(tn)): | ||
ret.append(fmtline(l)) | ||||
Augie Fackler
|
r43347 | ret.append(b'\n') | ||
Guillermo Pérez
|
r17946 | |||
Augie Fackler
|
r43347 | return b''.join(ret) | ||
Guillermo Pérez <bisho at fb.com>
|
r17939 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def patchtext(bin: bytes) -> bytes: | ||
mpm@selenic.com
|
r120 | pos = 0 | ||
t = [] | ||||
while pos < len(bin): | ||||
Augie Fackler
|
r43347 | p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12]) | ||
mpm@selenic.com
|
r120 | pos += 12 | ||
Augie Fackler
|
r43346 | t.append(bin[pos : pos + l]) | ||
mpm@selenic.com
|
r120 | pos += l | ||
Augie Fackler
|
r43347 | return b"".join(t) | ||
mpm@selenic.com
|
r120 | |||
Augie Fackler
|
r43346 | |||
mpm@selenic.com
|
r0 | def patch(a, bin): | ||
Benoit Boissinot
|
r12025 | if len(a) == 0: | ||
# skip over trivial delta header | ||||
Matt Mackall
|
r15657 | return util.buffer(bin, 12) | ||
Matt Mackall
|
r1379 | return mpatch.patches(a, [bin]) | ||
mpm@selenic.com
|
r432 | |||
Augie Fackler
|
r43346 | |||
Alexis S. L. Carvalho
|
r4361 | # similar to difflib.SequenceMatcher.get_matching_blocks | ||
Matt Harbison
|
r52829 | def get_matching_blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int]]: | ||
Alexis S. L. Carvalho
|
r4361 | return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)] | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def trivialdiffheader(length: int) -> bytes: | ||
Augie Fackler
|
r43347 | return struct.pack(b">lll", 0, 0, length) if length else b'' | ||
Matt Mackall
|
r5367 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52829 | def replacediffheader(oldlen: int, newlen: int) -> bytes: | ||
Augie Fackler
|
r43347 | return struct.pack(b">lll", 0, oldlen, newlen) | ||