mdiff.py
361 lines
| 11.3 KiB
| text/x-python
|
PythonLexer
/ mercurial / mdiff.py
mpm@selenic.com
|
r239 | # mdiff.py - diff and patch routines for mercurial | ||
# | ||||
Vadim Gelfer
|
r2859 | # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> | ||
mpm@selenic.com
|
r239 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
mpm@selenic.com
|
r239 | |||
Patrick Mezard
|
r6467 | from i18n import _ | ||
Simon Heimberg
|
r8312 | import bdiff, mpatch, util | ||
Guillermo Pérez <bisho at fb.com>
|
r17939 | import re, struct, base85, zlib | ||
mpm@selenic.com
|
r0 | |||
Vadim Gelfer
|
r2251 | def splitnewlines(text): | ||
Vadim Gelfer
|
r2248 | '''like str.splitlines, but only split on newlines.''' | ||
Vadim Gelfer
|
r2251 | lines = [l + '\n' for l in text.split('\n')] | ||
if lines: | ||||
if lines[-1] == '\n': | ||||
lines.pop() | ||||
else: | ||||
lines[-1] = lines[-1][:-1] | ||||
return lines | ||||
Vadim Gelfer
|
r2248 | |||
Vadim Gelfer
|
r2874 | class diffopts(object): | ||
'''context is the number of context lines | ||||
text treats all files as text | ||||
showfunc enables diff -p output | ||||
Brendan Cully
|
r2907 | git enables the git extended patch format | ||
Stephen Darnell
|
r3199 | nodates removes dates from diff headers | ||
Vadim Gelfer
|
r2874 | ignorews ignores all whitespace changes in the diff | ||
ignorewsamount ignores changes in the amount of whitespace | ||||
Patrick Mezard
|
r10189 | ignoreblanklines ignores changes whose lines are all blank | ||
upgrade generates git diffs to avoid data loss | ||||
''' | ||||
Thomas Arendsen Hein
|
r396 | |||
Vadim Gelfer
|
r2874 | defaults = { | ||
'context': 3, | ||||
'text': False, | ||||
Matt Mackall
|
r5863 | 'showfunc': False, | ||
Brendan Cully
|
r2907 | 'git': False, | ||
Stephen Darnell
|
r3199 | 'nodates': False, | ||
Vadim Gelfer
|
r2874 | 'ignorews': False, | ||
'ignorewsamount': False, | ||||
'ignoreblanklines': False, | ||||
Patrick Mezard
|
r10189 | 'upgrade': False, | ||
Vadim Gelfer
|
r2874 | } | ||
__slots__ = defaults.keys() | ||||
def __init__(self, **opts): | ||||
for k in self.__slots__: | ||||
v = opts.get(k) | ||||
if v is None: | ||||
v = self.defaults[k] | ||||
setattr(self, k, v) | ||||
Patrick Mezard
|
r6467 | try: | ||
self.context = int(self.context) | ||||
except ValueError: | ||||
raise util.Abort(_('diff context lines count must be ' | ||||
'an integer, not %r') % self.context) | ||||
Patrick Mezard
|
r10185 | def copy(self, **kwargs): | ||
opts = dict((k, getattr(self, k)) for k in self.defaults) | ||||
opts.update(kwargs) | ||||
return diffopts(**opts) | ||||
Vadim Gelfer
|
r2874 | defaultopts = diffopts() | ||
Patrick Mezard
|
r9827 | def wsclean(opts, text, blank=True): | ||
Matt Mackall
|
r4878 | if opts.ignorews: | ||
Patrick Mezard
|
r15530 | text = bdiff.fixws(text, 1) | ||
Matt Mackall
|
r4878 | elif opts.ignorewsamount: | ||
Patrick Mezard
|
r15530 | text = bdiff.fixws(text, 0) | ||
Patrick Mezard
|
r9827 | if blank and opts.ignoreblanklines: | ||
Patrick Mezard
|
r15509 | text = re.sub('\n+', '\n', text).strip('\n') | ||
Matt Mackall
|
r4878 | return text | ||
Patrick Mezard
|
r15528 | def splitblock(base1, lines1, base2, lines2, opts): | ||
# The input lines matches except for interwoven blank lines. We | ||||
# transform it into a sequence of matching blocks and blank blocks. | ||||
lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1] | ||||
lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2] | ||||
s1, e1 = 0, len(lines1) | ||||
s2, e2 = 0, len(lines2) | ||||
while s1 < e1 or s2 < e2: | ||||
i1, i2, btype = s1, s2, '=' | ||||
if (i1 >= e1 or lines1[i1] == 0 | ||||
or i2 >= e2 or lines2[i2] == 0): | ||||
# Consume the block of blank lines | ||||
btype = '~' | ||||
while i1 < e1 and lines1[i1] == 0: | ||||
i1 += 1 | ||||
while i2 < e2 and lines2[i2] == 0: | ||||
i2 += 1 | ||||
else: | ||||
# Consume the matching lines | ||||
while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1: | ||||
i1 += 1 | ||||
i2 += 1 | ||||
yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype | ||||
s1 = i1 | ||||
s2 = i2 | ||||
def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False): | ||||
Patrick Mezard
|
r15526 | """Return (block, type) tuples, where block is an mdiff.blocks | ||
line entry. type is '=' for blocks matching exactly one another | ||||
(bdiff blocks), '!' for non-matching blocks and '~' for blocks | ||||
Patrick Mezard
|
r15528 | matching only after having filtered blank lines. If refine is True, | ||
then '~' blocks are refined and are only made of blank lines. | ||||
Patrick Mezard
|
r15526 | line1 and line2 are text1 and text2 split with splitnewlines() if | ||
they are already available. | ||||
Patrick Mezard
|
r15525 | """ | ||
if opts is None: | ||||
opts = defaultopts | ||||
if opts.ignorews or opts.ignorewsamount: | ||||
text1 = wsclean(opts, text1, False) | ||||
text2 = wsclean(opts, text2, False) | ||||
diff = bdiff.blocks(text1, text2) | ||||
for i, s1 in enumerate(diff): | ||||
# The first match is special. | ||||
# we've either found a match starting at line 0 or a match later | ||||
# in the file. If it starts later, old and new below will both be | ||||
# empty and we'll continue to the next match. | ||||
if i > 0: | ||||
s = diff[i - 1] | ||||
else: | ||||
s = [0, 0, 0, 0] | ||||
s = [s[1], s1[0], s[3], s1[2]] | ||||
# bdiff sometimes gives huge matches past eof, this check eats them, | ||||
# and deals with the special first match case described above | ||||
Patrick Mezard
|
r15529 | if s[0] != s[1] or s[2] != s[3]: | ||
Patrick Mezard
|
r15526 | type = '!' | ||
if opts.ignoreblanklines: | ||||
Patrick Mezard
|
r15529 | if lines1 is None: | ||
lines1 = splitnewlines(text1) | ||||
if lines2 is None: | ||||
lines2 = splitnewlines(text2) | ||||
old = wsclean(opts, "".join(lines1[s[0]:s[1]])) | ||||
new = wsclean(opts, "".join(lines2[s[2]:s[3]])) | ||||
if old == new: | ||||
Patrick Mezard
|
r15526 | type = '~' | ||
yield s, type | ||||
yield s1, '=' | ||||
Patrick Mezard
|
r15525 | |||
Guillermo Pérez
|
r17940 | def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts): | ||
Patrick Mezard
|
r16362 | def datetag(date, fn=None): | ||
Alexis S. L. Carvalho
|
r4679 | if not opts.git and not opts.nodates: | ||
return '\t%s\n' % date | ||||
Patrick Mezard
|
r16362 | if fn and ' ' in fn: | ||
Alexis S. L. Carvalho
|
r4679 | return '\t\n' | ||
return '\n' | ||||
Brendan Cully
|
r3026 | |||
Matt Mackall
|
r10282 | if not a and not b: | ||
return "" | ||||
Matt Mackall
|
r1379 | epoch = util.datestr((0, 0)) | ||
mpm@selenic.com
|
r264 | |||
Mads Kiilerich
|
r15437 | fn1 = util.pconvert(fn1) | ||
fn2 = util.pconvert(fn2) | ||||
Vadim Gelfer
|
r2874 | if not opts.text and (util.binary(a) or util.binary(b)): | ||
Martin Geisler
|
r6871 | if a and b and len(a) == len(b) and a == b: | ||
tailgunner@smtp.ru
|
r4103 | return "" | ||
Dustin Sallings
|
r5482 | l = ['Binary file %s has changed\n' % fn1] | ||
Thomas Arendsen Hein
|
r1723 | elif not a: | ||
Vadim Gelfer
|
r2251 | b = splitnewlines(b) | ||
Thomas Arendsen Hein
|
r1723 | if a is None: | ||
Patrick Mezard
|
r16362 | l1 = '--- /dev/null%s' % datetag(epoch) | ||
Thomas Arendsen Hein
|
r1723 | else: | ||
Patrick Mezard
|
r16362 | l1 = "--- %s%s" % ("a/" + fn1, datetag(ad, fn1)) | ||
l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd, fn2)) | ||||
mpm@selenic.com
|
r264 | l3 = "@@ -0,0 +1,%d @@\n" % len(b) | ||
l = [l1, l2, l3] + ["+" + e for e in b] | ||||
Thomas Arendsen Hein
|
r1723 | elif not b: | ||
Vadim Gelfer
|
r2251 | a = splitnewlines(a) | ||
Patrick Mezard
|
r16362 | l1 = "--- %s%s" % ("a/" + fn1, datetag(ad, fn1)) | ||
Thomas Arendsen Hein
|
r1723 | if b is None: | ||
Patrick Mezard
|
r16362 | l2 = '+++ /dev/null%s' % datetag(epoch) | ||
Thomas Arendsen Hein
|
r1723 | else: | ||
Patrick Mezard
|
r16362 | l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd, fn2)) | ||
mpm@selenic.com
|
r264 | l3 = "@@ -1,%d +0,0 @@\n" % len(a) | ||
l = [l1, l2, l3] + ["-" + e for e in a] | ||||
else: | ||||
Vadim Gelfer
|
r2251 | al = splitnewlines(a) | ||
bl = splitnewlines(b) | ||||
Benoit Boissinot
|
r10614 | l = list(_unidiff(a, b, al, bl, opts=opts)) | ||
Matt Mackall
|
r10282 | if not l: | ||
return "" | ||||
Benoit Boissinot
|
r10614 | |||
Patrick Mezard
|
r16362 | l.insert(0, "--- a/%s%s" % (fn1, datetag(ad, fn1))) | ||
l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd, fn2))) | ||||
mpm@selenic.com
|
r170 | |||
for ln in xrange(len(l)): | ||||
if l[ln][-1] != '\n': | ||||
l[ln] += "\n\ No newline at end of file\n" | ||||
mpm@selenic.com
|
r0 | return "".join(l) | ||
Benoit Boissinot
|
r10614 | # creates a headerless unified diff | ||
mason@suse.com
|
r1637 | # t1 and t2 are the text to be diffed | ||
# l1 and l2 are the text broken up into lines | ||||
Benoit Boissinot
|
r10614 | def _unidiff(t1, t2, l1, l2, opts=defaultopts): | ||
mason@suse.com
|
r1637 | def contextend(l, len): | ||
Vadim Gelfer
|
r2874 | ret = l + opts.context | ||
mason@suse.com
|
r1637 | if ret > len: | ||
ret = len | ||||
return ret | ||||
def contextstart(l): | ||||
Vadim Gelfer
|
r2874 | ret = l - opts.context | ||
mason@suse.com
|
r1637 | if ret < 0: | ||
return 0 | ||||
return ret | ||||
Brodie Rao
|
r15141 | lastfunc = [0, ''] | ||
Benoit Boissinot
|
r10614 | def yieldhunk(hunk): | ||
mason@suse.com
|
r1637 | (astart, a2, bstart, b2, delta) = hunk | ||
aend = contextend(a2, len(l1)) | ||||
alen = aend - astart | ||||
blen = b2 - bstart + aend - a2 | ||||
func = "" | ||||
Vadim Gelfer
|
r2874 | if opts.showfunc: | ||
Brodie Rao
|
r15141 | lastpos, func = lastfunc | ||
# walk backwards from the start of the context up to the start of | ||||
# the previous hunk context until we find a line starting with an | ||||
# alphanumeric char. | ||||
for i in xrange(astart - 1, lastpos - 1, -1): | ||||
if l1[i][0].isalnum(): | ||||
func = ' ' + l1[i].rstrip()[:40] | ||||
lastfunc[1] = func | ||||
mason@suse.com
|
r1637 | break | ||
Brodie Rao
|
r15141 | # by recording this hunk's starting point as the next place to | ||
# start looking for function lines, we avoid reading any line in | ||||
# the file more than once. | ||||
lastfunc[0] = astart | ||||
mason@suse.com
|
r1637 | |||
Nicolas Venegas
|
r15462 | # zero-length hunk ranges report their start line as one less | ||
if alen: | ||||
astart += 1 | ||||
if blen: | ||||
bstart += 1 | ||||
yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen, | ||||
bstart, blen, func) | ||||
mason@suse.com
|
r1637 | for x in delta: | ||
yield x | ||||
for x in xrange(a2, aend): | ||||
yield ' ' + l1[x] | ||||
# bdiff.blocks gives us the matching sequences in the files. The loop | ||||
# below finds the spaces between those matching sequences and translates | ||||
# them into diff output. | ||||
# | ||||
hunk = None | ||||
Patrick Mezard
|
r16089 | ignoredlines = 0 | ||
Patrick Mezard
|
r15526 | for s, stype in allblocks(t1, t2, opts, l1, l2): | ||
Patrick Mezard
|
r16089 | a1, a2, b1, b2 = s | ||
Patrick Mezard
|
r15526 | if stype != '!': | ||
Patrick Mezard
|
r16089 | if stype == '~': | ||
# The diff context lines are based on t1 content. When | ||||
# blank lines are ignored, the new lines offsets must | ||||
# be adjusted as if equivalent blocks ('~') had the | ||||
# same sizes on both sides. | ||||
ignoredlines += (b2 - b1) - (a2 - a1) | ||||
Patrick Mezard
|
r15526 | continue | ||
mason@suse.com
|
r1637 | delta = [] | ||
old = l1[a1:a2] | ||||
new = l2[b1:b2] | ||||
Patrick Mezard
|
r16089 | b1 -= ignoredlines | ||
b2 -= ignoredlines | ||||
mason@suse.com
|
r1637 | astart = contextstart(a1) | ||
bstart = contextstart(b1) | ||||
prev = None | ||||
if hunk: | ||||
# join with the previous hunk if it falls inside the context | ||||
Vadim Gelfer
|
r2874 | if astart < hunk[1] + opts.context + 1: | ||
mason@suse.com
|
r1637 | prev = hunk | ||
astart = hunk[1] | ||||
bstart = hunk[3] | ||||
else: | ||||
Benoit Boissinot
|
r10614 | for x in yieldhunk(hunk): | ||
mason@suse.com
|
r1637 | yield x | ||
if prev: | ||||
# we've joined the previous hunk, record the new ending points. | ||||
hunk[1] = a2 | ||||
hunk[3] = b2 | ||||
delta = hunk[4] | ||||
else: | ||||
# create a new hunk | ||||
Matt Mackall
|
r10282 | hunk = [astart, a2, bstart, b2, delta] | ||
mason@suse.com
|
r1637 | |||
Matt Mackall
|
r10282 | delta[len(delta):] = [' ' + x for x in l1[astart:a1]] | ||
delta[len(delta):] = ['-' + x for x in old] | ||||
delta[len(delta):] = ['+' + x for x in new] | ||||
mason@suse.com
|
r1637 | |||
if hunk: | ||||
Benoit Boissinot
|
r10614 | for x in yieldhunk(hunk): | ||
mason@suse.com
|
r1637 | yield x | ||
Guillermo Pérez <bisho at fb.com>
|
r17939 | def b85diff(to, tn): | ||
'''print base85-encoded binary diff''' | ||||
def fmtline(line): | ||||
l = len(line) | ||||
if l <= 26: | ||||
l = chr(ord('A') + l - 1) | ||||
else: | ||||
l = chr(l - 26 + ord('a') - 1) | ||||
return '%c%s\n' % (l, base85.b85encode(line, True)) | ||||
def chunk(text, csize=52): | ||||
l = len(text) | ||||
i = 0 | ||||
while i < l: | ||||
yield text[i:i + csize] | ||||
i += csize | ||||
Guillermo Pérez
|
r17946 | if to is None: | ||
to = '' | ||||
if tn is None: | ||||
tn = '' | ||||
if to == tn: | ||||
return '' | ||||
Guillermo Pérez <bisho at fb.com>
|
r17939 | |||
# TODO: deltas | ||||
Guillermo Pérez
|
r17946 | ret = [] | ||
ret.append('GIT binary patch\n') | ||||
ret.append('literal %s\n' % len(tn)) | ||||
Guillermo Pérez <bisho at fb.com>
|
r17939 | for l in chunk(zlib.compress(tn)): | ||
ret.append(fmtline(l)) | ||||
ret.append('\n') | ||||
Guillermo Pérez
|
r17946 | |||
Guillermo Pérez <bisho at fb.com>
|
r17939 | return ''.join(ret) | ||
mpm@selenic.com
|
r120 | def patchtext(bin): | ||
pos = 0 | ||||
t = [] | ||||
while pos < len(bin): | ||||
p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) | ||||
pos += 12 | ||||
t.append(bin[pos:pos + l]) | ||||
pos += l | ||||
return "".join(t) | ||||
mpm@selenic.com
|
r0 | def patch(a, bin): | ||
Benoit Boissinot
|
r12025 | if len(a) == 0: | ||
# skip over trivial delta header | ||||
Matt Mackall
|
r15657 | return util.buffer(bin, 12) | ||
Matt Mackall
|
r1379 | return mpatch.patches(a, [bin]) | ||
mpm@selenic.com
|
r432 | |||
Alexis S. L. Carvalho
|
r4361 | # similar to difflib.SequenceMatcher.get_matching_blocks | ||
def get_matching_blocks(a, b): | ||||
return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)] | ||||
Matt Mackall
|
r5367 | def trivialdiffheader(length): | ||
return struct.pack(">lll", 0, 0, length) | ||||
Matt Mackall
|
r1379 | patches = mpatch.patches | ||
mason@suse.com
|
r2078 | patchedsize = mpatch.patchedsize | ||
mpm@selenic.com
|
r432 | textdiff = bdiff.bdiff | ||