Show More
mdiff.py
457 lines
| 14.4 KiB
| text/x-python
|
PythonLexer
/ mercurial / mdiff.py
mpm@selenic.com
|
r239 | # mdiff.py - diff and patch routines for mercurial | ||
# | ||||
Vadim Gelfer
|
r2859 | # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> | ||
mpm@selenic.com
|
r239 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
mpm@selenic.com
|
r239 | |||
Gregory Szorc
|
r27484 | from __future__ import absolute_import | ||
import re | ||||
import struct | ||||
import zlib | ||||
from .i18n import _ | ||||
from . import ( | ||||
base85, | ||||
bdiff, | ||||
error, | ||||
mpatch, | ||||
util, | ||||
) | ||||
mpm@selenic.com
|
r0 | |||
Vadim Gelfer
|
r2251 | def splitnewlines(text): | ||
Vadim Gelfer
|
r2248 | '''like str.splitlines, but only split on newlines.''' | ||
Vadim Gelfer
|
r2251 | lines = [l + '\n' for l in text.split('\n')] | ||
if lines: | ||||
if lines[-1] == '\n': | ||||
lines.pop() | ||||
else: | ||||
lines[-1] = lines[-1][:-1] | ||||
return lines | ||||
Vadim Gelfer
|
r2248 | |||
Vadim Gelfer
|
r2874 | class diffopts(object): | ||
'''context is the number of context lines | ||||
text treats all files as text | ||||
showfunc enables diff -p output | ||||
Brendan Cully
|
r2907 | git enables the git extended patch format | ||
Stephen Darnell
|
r3199 | nodates removes dates from diff headers | ||
Siddharth Agarwal
|
r23293 | nobinary ignores binary files | ||
Siddharth Agarwal
|
r23294 | noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode) | ||
Vadim Gelfer
|
r2874 | ignorews ignores all whitespace changes in the diff | ||
ignorewsamount ignores changes in the amount of whitespace | ||||
Patrick Mezard
|
r10189 | ignoreblanklines ignores changes whose lines are all blank | ||
upgrade generates git diffs to avoid data loss | ||||
''' | ||||
Thomas Arendsen Hein
|
r396 | |||
Vadim Gelfer
|
r2874 | defaults = { | ||
'context': 3, | ||||
'text': False, | ||||
Matt Mackall
|
r5863 | 'showfunc': False, | ||
Brendan Cully
|
r2907 | 'git': False, | ||
Stephen Darnell
|
r3199 | 'nodates': False, | ||
Stephen Lee
|
r21790 | 'nobinary': False, | ||
Siddharth Agarwal
|
r23294 | 'noprefix': False, | ||
Sean Farley
|
r30788 | 'index': 0, | ||
Vadim Gelfer
|
r2874 | 'ignorews': False, | ||
'ignorewsamount': False, | ||||
'ignoreblanklines': False, | ||||
Patrick Mezard
|
r10189 | 'upgrade': False, | ||
Sean Farley
|
r30806 | 'showsimilarity': False, | ||
Vadim Gelfer
|
r2874 | } | ||
def __init__(self, **opts): | ||||
Gregory Szorc
|
r29416 | for k in self.defaults.keys(): | ||
Vadim Gelfer
|
r2874 | v = opts.get(k) | ||
if v is None: | ||||
v = self.defaults[k] | ||||
setattr(self, k, v) | ||||
Patrick Mezard
|
r6467 | try: | ||
self.context = int(self.context) | ||||
except ValueError: | ||||
Pierre-Yves David
|
r26587 | raise error.Abort(_('diff context lines count must be ' | ||
Patrick Mezard
|
r6467 | 'an integer, not %r') % self.context) | ||
Patrick Mezard
|
r10185 | def copy(self, **kwargs): | ||
opts = dict((k, getattr(self, k)) for k in self.defaults) | ||||
opts.update(kwargs) | ||||
return diffopts(**opts) | ||||
Vadim Gelfer
|
r2874 | defaultopts = diffopts() | ||
Patrick Mezard
|
r9827 | def wsclean(opts, text, blank=True): | ||
Matt Mackall
|
r4878 | if opts.ignorews: | ||
Patrick Mezard
|
r15530 | text = bdiff.fixws(text, 1) | ||
Matt Mackall
|
r4878 | elif opts.ignorewsamount: | ||
Patrick Mezard
|
r15530 | text = bdiff.fixws(text, 0) | ||
Patrick Mezard
|
r9827 | if blank and opts.ignoreblanklines: | ||
Patrick Mezard
|
r15509 | text = re.sub('\n+', '\n', text).strip('\n') | ||
Matt Mackall
|
r4878 | return text | ||
Patrick Mezard
|
r15528 | def splitblock(base1, lines1, base2, lines2, opts): | ||
# The input lines matches except for interwoven blank lines. We | ||||
# transform it into a sequence of matching blocks and blank blocks. | ||||
lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1] | ||||
lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2] | ||||
s1, e1 = 0, len(lines1) | ||||
s2, e2 = 0, len(lines2) | ||||
while s1 < e1 or s2 < e2: | ||||
i1, i2, btype = s1, s2, '=' | ||||
if (i1 >= e1 or lines1[i1] == 0 | ||||
or i2 >= e2 or lines2[i2] == 0): | ||||
# Consume the block of blank lines | ||||
btype = '~' | ||||
while i1 < e1 and lines1[i1] == 0: | ||||
i1 += 1 | ||||
while i2 < e2 and lines2[i2] == 0: | ||||
i2 += 1 | ||||
else: | ||||
# Consume the matching lines | ||||
while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1: | ||||
i1 += 1 | ||||
i2 += 1 | ||||
yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype | ||||
s1 = i1 | ||||
s2 = i2 | ||||
Denis Laxalde
|
r30717 | def blocksinrange(blocks, rangeb): | ||
"""filter `blocks` like (a1, a2, b1, b2) from items outside line range | ||||
`rangeb` from ``(b1, b2)`` point of view. | ||||
Return `filteredblocks, rangea` where: | ||||
* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of | ||||
`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a | ||||
block ``(b1, b2)`` being inside `rangeb` if | ||||
``rangeb[0] < b2 and b1 < rangeb[1]``; | ||||
* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`. | ||||
""" | ||||
lbb, ubb = rangeb | ||||
lba, uba = None, None | ||||
filteredblocks = [] | ||||
for block in blocks: | ||||
(a1, a2, b1, b2), stype = block | ||||
if lbb >= b1 and ubb <= b2 and stype == '=': | ||||
# rangeb is within a single "=" hunk, restrict back linerange1 | ||||
# by offsetting rangeb | ||||
lba = lbb - b1 + a1 | ||||
uba = ubb - b1 + a1 | ||||
else: | ||||
if b1 <= lbb < b2: | ||||
if stype == '=': | ||||
lba = a2 - (b2 - lbb) | ||||
else: | ||||
lba = a1 | ||||
if b1 < ubb <= b2: | ||||
if stype == '=': | ||||
uba = a1 + (ubb - b1) | ||||
else: | ||||
uba = a2 | ||||
if lbb < b2 and b1 < ubb: | ||||
filteredblocks.append(block) | ||||
if lba is None or uba is None or uba < lba: | ||||
raise error.Abort(_('line range exceeds file size')) | ||||
return filteredblocks, (lba, uba) | ||||
Philippe Pepiot
|
r30023 | def allblocks(text1, text2, opts=None, lines1=None, lines2=None): | ||
Patrick Mezard
|
r15526 | """Return (block, type) tuples, where block is an mdiff.blocks | ||
line entry. type is '=' for blocks matching exactly one another | ||||
(bdiff blocks), '!' for non-matching blocks and '~' for blocks | ||||
Philippe Pepiot
|
r30023 | matching only after having filtered blank lines. | ||
Patrick Mezard
|
r15526 | line1 and line2 are text1 and text2 split with splitnewlines() if | ||
they are already available. | ||||
Patrick Mezard
|
r15525 | """ | ||
if opts is None: | ||||
opts = defaultopts | ||||
if opts.ignorews or opts.ignorewsamount: | ||||
text1 = wsclean(opts, text1, False) | ||||
text2 = wsclean(opts, text2, False) | ||||
diff = bdiff.blocks(text1, text2) | ||||
for i, s1 in enumerate(diff): | ||||
# The first match is special. | ||||
# we've either found a match starting at line 0 or a match later | ||||
# in the file. If it starts later, old and new below will both be | ||||
# empty and we'll continue to the next match. | ||||
if i > 0: | ||||
s = diff[i - 1] | ||||
else: | ||||
s = [0, 0, 0, 0] | ||||
s = [s[1], s1[0], s[3], s1[2]] | ||||
# bdiff sometimes gives huge matches past eof, this check eats them, | ||||
# and deals with the special first match case described above | ||||
Patrick Mezard
|
r15529 | if s[0] != s[1] or s[2] != s[3]: | ||
Patrick Mezard
|
r15526 | type = '!' | ||
if opts.ignoreblanklines: | ||||
Patrick Mezard
|
r15529 | if lines1 is None: | ||
lines1 = splitnewlines(text1) | ||||
if lines2 is None: | ||||
lines2 = splitnewlines(text2) | ||||
old = wsclean(opts, "".join(lines1[s[0]:s[1]])) | ||||
new = wsclean(opts, "".join(lines2[s[2]:s[3]])) | ||||
if old == new: | ||||
Patrick Mezard
|
r15526 | type = '~' | ||
yield s, type | ||||
yield s1, '=' | ||||
Patrick Mezard
|
r15525 | |||
Guillermo Pérez
|
r17940 | def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts): | ||
Denis Laxalde
|
r31273 | """Return a unified diff as a (headers, hunks) tuple. | ||
Denis Laxalde
|
r31271 | |||
If the diff is not null, `headers` is a list with unified diff header | ||||
Denis Laxalde
|
r31273 | lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding | ||
(hunkrange, hunklines) coming from _unidiff(). | ||||
Otherwise, `headers` and `hunks` are empty. | ||||
Denis Laxalde
|
r31271 | """ | ||
Patrick Mezard
|
r16362 | def datetag(date, fn=None): | ||
Alexis S. L. Carvalho
|
r4679 | if not opts.git and not opts.nodates: | ||
Denis Laxalde
|
r31271 | return '\t%s' % date | ||
Patrick Mezard
|
r16362 | if fn and ' ' in fn: | ||
Denis Laxalde
|
r31271 | return '\t' | ||
return '' | ||||
Brendan Cully
|
r3026 | |||
Denis Laxalde
|
r31273 | sentinel = [], () | ||
Matt Mackall
|
r10282 | if not a and not b: | ||
Denis Laxalde
|
r31271 | return sentinel | ||
Siddharth Agarwal
|
r23299 | |||
if opts.noprefix: | ||||
aprefix = bprefix = '' | ||||
else: | ||||
aprefix = 'a/' | ||||
bprefix = 'b/' | ||||
Matt Mackall
|
r1379 | epoch = util.datestr((0, 0)) | ||
mpm@selenic.com
|
r264 | |||
Mads Kiilerich
|
r15437 | fn1 = util.pconvert(fn1) | ||
fn2 = util.pconvert(fn2) | ||||
Denis Laxalde
|
r31272 | def checknonewline(lines): | ||
for text in lines: | ||||
if text[-1] != '\n': | ||||
text += "\n\ No newline at end of file\n" | ||||
yield text | ||||
Vadim Gelfer
|
r2874 | if not opts.text and (util.binary(a) or util.binary(b)): | ||
Martin Geisler
|
r6871 | if a and b and len(a) == len(b) and a == b: | ||
Denis Laxalde
|
r31271 | return sentinel | ||
headerlines = [] | ||||
Denis Laxalde
|
r31273 | hunks = (None, ['Binary file %s has changed\n' % fn1]), | ||
Thomas Arendsen Hein
|
r1723 | elif not a: | ||
Vadim Gelfer
|
r2251 | b = splitnewlines(b) | ||
Thomas Arendsen Hein
|
r1723 | if a is None: | ||
Patrick Mezard
|
r16362 | l1 = '--- /dev/null%s' % datetag(epoch) | ||
Thomas Arendsen Hein
|
r1723 | else: | ||
Siddharth Agarwal
|
r23299 | l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)) | ||
l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2)) | ||||
Denis Laxalde
|
r31271 | headerlines = [l1, l2] | ||
Denis Laxalde
|
r31273 | size = len(b) | ||
hunkrange = (0, 0, 1, size) | ||||
hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b] | ||||
hunks = (hunkrange, checknonewline(hunklines)), | ||||
Thomas Arendsen Hein
|
r1723 | elif not b: | ||
Vadim Gelfer
|
r2251 | a = splitnewlines(a) | ||
Siddharth Agarwal
|
r23299 | l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)) | ||
Thomas Arendsen Hein
|
r1723 | if b is None: | ||
Patrick Mezard
|
r16362 | l2 = '+++ /dev/null%s' % datetag(epoch) | ||
Thomas Arendsen Hein
|
r1723 | else: | ||
Siddharth Agarwal
|
r23299 | l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)) | ||
Denis Laxalde
|
r31271 | headerlines = [l1, l2] | ||
Denis Laxalde
|
r31273 | size = len(a) | ||
hunkrange = (1, size, 0, 0) | ||||
hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a] | ||||
hunks = (hunkrange, checknonewline(hunklines)), | ||||
mpm@selenic.com
|
r264 | else: | ||
Denis Laxalde
|
r31273 | diffhunks = _unidiff(a, b, opts=opts) | ||
try: | ||||
hunkrange, hunklines = next(diffhunks) | ||||
except StopIteration: | ||||
Denis Laxalde
|
r31271 | return sentinel | ||
Benoit Boissinot
|
r10614 | |||
Denis Laxalde
|
r31271 | headerlines = [ | ||
"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)), | ||||
"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)), | ||||
] | ||||
Denis Laxalde
|
r31273 | def rewindhunks(): | ||
yield hunkrange, checknonewline(hunklines) | ||||
for hr, hl in diffhunks: | ||||
yield hr, checknonewline(hl) | ||||
mpm@selenic.com
|
r170 | |||
Denis Laxalde
|
r31273 | hunks = rewindhunks() | ||
return headerlines, hunks | ||||
mpm@selenic.com
|
r0 | |||
Denis Laxalde
|
r31267 | def _unidiff(t1, t2, opts=defaultopts): | ||
Denis Laxalde
|
r31269 | """Yield hunks of a headerless unified diff from t1 and t2 texts. | ||
Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a | ||||
tuple (s1, l1, s2, l2) representing the range information of the hunk to | ||||
form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines | ||||
of the hunk combining said header followed by line additions and | ||||
deletions. | ||||
""" | ||||
Denis Laxalde
|
r31267 | l1 = splitnewlines(t1) | ||
l2 = splitnewlines(t2) | ||||
mason@suse.com
|
r1637 | def contextend(l, len): | ||
Vadim Gelfer
|
r2874 | ret = l + opts.context | ||
mason@suse.com
|
r1637 | if ret > len: | ||
ret = len | ||||
return ret | ||||
def contextstart(l): | ||||
Vadim Gelfer
|
r2874 | ret = l - opts.context | ||
mason@suse.com
|
r1637 | if ret < 0: | ||
return 0 | ||||
return ret | ||||
Brodie Rao
|
r15141 | lastfunc = [0, ''] | ||
Benoit Boissinot
|
r10614 | def yieldhunk(hunk): | ||
mason@suse.com
|
r1637 | (astart, a2, bstart, b2, delta) = hunk | ||
aend = contextend(a2, len(l1)) | ||||
alen = aend - astart | ||||
blen = b2 - bstart + aend - a2 | ||||
func = "" | ||||
Vadim Gelfer
|
r2874 | if opts.showfunc: | ||
Brodie Rao
|
r15141 | lastpos, func = lastfunc | ||
# walk backwards from the start of the context up to the start of | ||||
# the previous hunk context until we find a line starting with an | ||||
# alphanumeric char. | ||||
for i in xrange(astart - 1, lastpos - 1, -1): | ||||
if l1[i][0].isalnum(): | ||||
func = ' ' + l1[i].rstrip()[:40] | ||||
lastfunc[1] = func | ||||
mason@suse.com
|
r1637 | break | ||
Brodie Rao
|
r15141 | # by recording this hunk's starting point as the next place to | ||
# start looking for function lines, we avoid reading any line in | ||||
# the file more than once. | ||||
lastfunc[0] = astart | ||||
mason@suse.com
|
r1637 | |||
Nicolas Venegas
|
r15462 | # zero-length hunk ranges report their start line as one less | ||
if alen: | ||||
astart += 1 | ||||
if blen: | ||||
bstart += 1 | ||||
Denis Laxalde
|
r31269 | hunkrange = astart, alen, bstart, blen | ||
hunklines = ( | ||||
["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))] | ||||
+ delta | ||||
+ [' ' + l1[x] for x in xrange(a2, aend)] | ||||
) | ||||
yield hunkrange, hunklines | ||||
mason@suse.com
|
r1637 | |||
# bdiff.blocks gives us the matching sequences in the files. The loop | ||||
# below finds the spaces between those matching sequences and translates | ||||
# them into diff output. | ||||
# | ||||
hunk = None | ||||
Patrick Mezard
|
r16089 | ignoredlines = 0 | ||
Patrick Mezard
|
r15526 | for s, stype in allblocks(t1, t2, opts, l1, l2): | ||
Patrick Mezard
|
r16089 | a1, a2, b1, b2 = s | ||
Patrick Mezard
|
r15526 | if stype != '!': | ||
Patrick Mezard
|
r16089 | if stype == '~': | ||
# The diff context lines are based on t1 content. When | ||||
# blank lines are ignored, the new lines offsets must | ||||
# be adjusted as if equivalent blocks ('~') had the | ||||
# same sizes on both sides. | ||||
ignoredlines += (b2 - b1) - (a2 - a1) | ||||
Patrick Mezard
|
r15526 | continue | ||
mason@suse.com
|
r1637 | delta = [] | ||
old = l1[a1:a2] | ||||
new = l2[b1:b2] | ||||
Patrick Mezard
|
r16089 | b1 -= ignoredlines | ||
b2 -= ignoredlines | ||||
mason@suse.com
|
r1637 | astart = contextstart(a1) | ||
bstart = contextstart(b1) | ||||
prev = None | ||||
if hunk: | ||||
# join with the previous hunk if it falls inside the context | ||||
Vadim Gelfer
|
r2874 | if astart < hunk[1] + opts.context + 1: | ||
mason@suse.com
|
r1637 | prev = hunk | ||
astart = hunk[1] | ||||
bstart = hunk[3] | ||||
else: | ||||
Benoit Boissinot
|
r10614 | for x in yieldhunk(hunk): | ||
mason@suse.com
|
r1637 | yield x | ||
if prev: | ||||
# we've joined the previous hunk, record the new ending points. | ||||
hunk[1] = a2 | ||||
hunk[3] = b2 | ||||
delta = hunk[4] | ||||
else: | ||||
# create a new hunk | ||||
Matt Mackall
|
r10282 | hunk = [astart, a2, bstart, b2, delta] | ||
mason@suse.com
|
r1637 | |||
Matt Mackall
|
r10282 | delta[len(delta):] = [' ' + x for x in l1[astart:a1]] | ||
delta[len(delta):] = ['-' + x for x in old] | ||||
delta[len(delta):] = ['+' + x for x in new] | ||||
mason@suse.com
|
r1637 | |||
if hunk: | ||||
Benoit Boissinot
|
r10614 | for x in yieldhunk(hunk): | ||
mason@suse.com
|
r1637 | yield x | ||
Guillermo Pérez <bisho at fb.com>
|
r17939 | def b85diff(to, tn): | ||
'''print base85-encoded binary diff''' | ||||
def fmtline(line): | ||||
l = len(line) | ||||
if l <= 26: | ||||
l = chr(ord('A') + l - 1) | ||||
else: | ||||
l = chr(l - 26 + ord('a') - 1) | ||||
return '%c%s\n' % (l, base85.b85encode(line, True)) | ||||
def chunk(text, csize=52): | ||||
l = len(text) | ||||
i = 0 | ||||
while i < l: | ||||
yield text[i:i + csize] | ||||
i += csize | ||||
Guillermo Pérez
|
r17946 | if to is None: | ||
to = '' | ||||
if tn is None: | ||||
tn = '' | ||||
if to == tn: | ||||
return '' | ||||
Guillermo Pérez <bisho at fb.com>
|
r17939 | |||
# TODO: deltas | ||||
Guillermo Pérez
|
r17946 | ret = [] | ||
ret.append('GIT binary patch\n') | ||||
ret.append('literal %s\n' % len(tn)) | ||||
Guillermo Pérez <bisho at fb.com>
|
r17939 | for l in chunk(zlib.compress(tn)): | ||
ret.append(fmtline(l)) | ||||
ret.append('\n') | ||||
Guillermo Pérez
|
r17946 | |||
Guillermo Pérez <bisho at fb.com>
|
r17939 | return ''.join(ret) | ||
mpm@selenic.com
|
r120 | def patchtext(bin): | ||
pos = 0 | ||||
t = [] | ||||
while pos < len(bin): | ||||
p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) | ||||
pos += 12 | ||||
t.append(bin[pos:pos + l]) | ||||
pos += l | ||||
return "".join(t) | ||||
mpm@selenic.com
|
r0 | def patch(a, bin): | ||
Benoit Boissinot
|
r12025 | if len(a) == 0: | ||
# skip over trivial delta header | ||||
Matt Mackall
|
r15657 | return util.buffer(bin, 12) | ||
Matt Mackall
|
r1379 | return mpatch.patches(a, [bin]) | ||
mpm@selenic.com
|
r432 | |||
Alexis S. L. Carvalho
|
r4361 | # similar to difflib.SequenceMatcher.get_matching_blocks | ||
def get_matching_blocks(a, b): | ||||
return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)] | ||||
Matt Mackall
|
r5367 | def trivialdiffheader(length): | ||
Mike Hommey
|
r27711 | return struct.pack(">lll", 0, 0, length) if length else '' | ||
Matt Mackall
|
r5367 | |||
Mike Edgar
|
r24119 | def replacediffheader(oldlen, newlen): | ||
return struct.pack(">lll", 0, oldlen, newlen) | ||||
Matt Mackall
|
r1379 | patches = mpatch.patches | ||
mason@suse.com
|
r2078 | patchedsize = mpatch.patchedsize | ||
mpm@selenic.com
|
r432 | textdiff = bdiff.bdiff | ||