simplemerge.py
510 lines
| 16.0 KiB
| text/x-python
|
PythonLexer
/ mercurial / simplemerge.py
Matt Mackall
|
r6002 | # Copyright (C) 2004, 2005 Canonical Ltd | ||
# | ||||
# This program is free software; you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation; either version 2 of the License, or | ||||
# (at your option) any later version. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU General Public License | ||||
Martin Geisler
|
r15782 | # along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
Matt Mackall
|
r6002 | |||
# mbp: "you know that thing where cvs gives you conflict markers?" | ||||
# s: "i hate that." | ||||
Gregory Szorc
|
r25974 | from __future__ import absolute_import | ||
from .i18n import _ | ||||
from . import ( | ||||
Pierre-Yves David
|
r26587 | error, | ||
Gregory Szorc
|
r25974 | mdiff, | ||
Pulkit Goyal
|
r33017 | pycompat, | ||
Yuya Nishihara
|
r37102 | ) | ||
Augie Fackler
|
r43346 | from .utils import stringutil | ||
Matt Mackall
|
r6002 | |||
class CantReprocessAndShowBase(Exception): | ||||
pass | ||||
Augie Fackler
|
r43346 | |||
Matt Mackall
|
r6002 | def intersect(ra, rb): | ||
"""Given two ranges return the range where they intersect or None. | ||||
>>> intersect((0, 10), (0, 6)) | ||||
(0, 6) | ||||
>>> intersect((0, 10), (5, 15)) | ||||
(5, 10) | ||||
>>> intersect((0, 10), (10, 15)) | ||||
>>> intersect((0, 9), (10, 15)) | ||||
>>> intersect((0, 9), (7, 15)) | ||||
(7, 9) | ||||
""" | ||||
assert ra[0] <= ra[1] | ||||
assert rb[0] <= rb[1] | ||||
sa = max(ra[0], rb[0]) | ||||
sb = min(ra[1], rb[1]) | ||||
if sa < sb: | ||||
return sa, sb | ||||
else: | ||||
return None | ||||
Augie Fackler
|
r43346 | |||
Matt Mackall
|
r6002 | def compare_range(a, astart, aend, b, bstart, bend): | ||
"""Compare a[astart:aend] == b[bstart:bend], without slicing. | ||||
""" | ||||
Matt Mackall
|
r10282 | if (aend - astart) != (bend - bstart): | ||
Matt Mackall
|
r6002 | return False | ||
Augie Fackler
|
r43346 | for ia, ib in zip( | ||
pycompat.xrange(astart, aend), pycompat.xrange(bstart, bend) | ||||
): | ||||
Matt Mackall
|
r6002 | if a[ia] != b[ib]: | ||
return False | ||||
else: | ||||
return True | ||||
Augie Fackler
|
r43346 | |||
Matt Mackall
|
r6002 | class Merge3Text(object): | ||
"""3-way merge of texts. | ||||
Given strings BASE, OTHER, THIS, tries to produce a combined text | ||||
incorporating the changes from both BASE->OTHER and BASE->THIS.""" | ||||
Augie Fackler
|
r43346 | |||
Matt Mackall
|
r6002 | def __init__(self, basetext, atext, btext, base=None, a=None, b=None): | ||
self.basetext = basetext | ||||
self.atext = atext | ||||
self.btext = btext | ||||
if base is None: | ||||
base = mdiff.splitnewlines(basetext) | ||||
if a is None: | ||||
a = mdiff.splitnewlines(atext) | ||||
if b is None: | ||||
b = mdiff.splitnewlines(btext) | ||||
self.base = base | ||||
self.a = a | ||||
self.b = b | ||||
Augie Fackler
|
r43346 | def merge_lines( | ||
self, | ||||
name_a=None, | ||||
name_b=None, | ||||
name_base=None, | ||||
Augie Fackler
|
r43347 | start_marker=b'<<<<<<<', | ||
mid_marker=b'=======', | ||||
end_marker=b'>>>>>>>', | ||||
Augie Fackler
|
r43346 | base_marker=None, | ||
localorother=None, | ||||
minimize=False, | ||||
): | ||||
Matt Mackall
|
r6002 | """Return merge in cvs-like form. | ||
""" | ||||
self.conflicts = False | ||||
Augie Fackler
|
r43347 | newline = b'\n' | ||
Matt Mackall
|
r6002 | if len(self.a) > 0: | ||
Augie Fackler
|
r43347 | if self.a[0].endswith(b'\r\n'): | ||
newline = b'\r\n' | ||||
elif self.a[0].endswith(b'\r'): | ||||
newline = b'\r' | ||||
Erik Huelsmann
|
r26069 | if name_a and start_marker: | ||
Augie Fackler
|
r43347 | start_marker = start_marker + b' ' + name_a | ||
Erik Huelsmann
|
r26069 | if name_b and end_marker: | ||
Augie Fackler
|
r43347 | end_marker = end_marker + b' ' + name_b | ||
Matt Mackall
|
r6002 | if name_base and base_marker: | ||
Augie Fackler
|
r43347 | base_marker = base_marker + b' ' + name_base | ||
Matt Mackall
|
r6002 | merge_regions = self.merge_regions() | ||
Ryan McElroy
|
r28072 | if minimize: | ||
merge_regions = self.minimize(merge_regions) | ||||
Matt Mackall
|
r6002 | for t in merge_regions: | ||
what = t[0] | ||||
Augie Fackler
|
r43347 | if what == b'unchanged': | ||
Matt Mackall
|
r6002 | for i in range(t[1], t[2]): | ||
yield self.base[i] | ||||
Augie Fackler
|
r43347 | elif what == b'a' or what == b'same': | ||
Matt Mackall
|
r6002 | for i in range(t[1], t[2]): | ||
yield self.a[i] | ||||
Augie Fackler
|
r43347 | elif what == b'b': | ||
Matt Mackall
|
r6002 | for i in range(t[1], t[2]): | ||
yield self.b[i] | ||||
Augie Fackler
|
r43347 | elif what == b'conflict': | ||
if localorother == b'local': | ||||
Jordi Gutiérrez Hermoso
|
r26223 | for i in range(t[3], t[4]): | ||
yield self.a[i] | ||||
Augie Fackler
|
r43347 | elif localorother == b'other': | ||
Jordi Gutiérrez Hermoso
|
r26223 | for i in range(t[5], t[6]): | ||
yield self.b[i] | ||||
else: | ||||
self.conflicts = True | ||||
if start_marker is not None: | ||||
yield start_marker + newline | ||||
for i in range(t[3], t[4]): | ||||
yield self.a[i] | ||||
if base_marker is not None: | ||||
yield base_marker + newline | ||||
for i in range(t[1], t[2]): | ||||
yield self.base[i] | ||||
if mid_marker is not None: | ||||
yield mid_marker + newline | ||||
for i in range(t[5], t[6]): | ||||
yield self.b[i] | ||||
if end_marker is not None: | ||||
yield end_marker + newline | ||||
Matt Mackall
|
r6002 | else: | ||
raise ValueError(what) | ||||
def merge_groups(self): | ||||
"""Yield sequence of line groups. Each one is a tuple: | ||||
'unchanged', lines | ||||
Lines unchanged from base | ||||
'a', lines | ||||
Lines taken from a | ||||
'same', lines | ||||
Lines taken from a (and equal to b) | ||||
'b', lines | ||||
Lines taken from b | ||||
'conflict', base_lines, a_lines, b_lines | ||||
Lines from base were changed to either a or b and conflict. | ||||
""" | ||||
for t in self.merge_regions(): | ||||
what = t[0] | ||||
Augie Fackler
|
r43347 | if what == b'unchanged': | ||
Augie Fackler
|
r43346 | yield what, self.base[t[1] : t[2]] | ||
Augie Fackler
|
r43347 | elif what == b'a' or what == b'same': | ||
Augie Fackler
|
r43346 | yield what, self.a[t[1] : t[2]] | ||
Augie Fackler
|
r43347 | elif what == b'b': | ||
Augie Fackler
|
r43346 | yield what, self.b[t[1] : t[2]] | ||
Augie Fackler
|
r43347 | elif what == b'conflict': | ||
Augie Fackler
|
r43346 | yield ( | ||
what, | ||||
self.base[t[1] : t[2]], | ||||
self.a[t[3] : t[4]], | ||||
self.b[t[5] : t[6]], | ||||
) | ||||
Matt Mackall
|
r6002 | else: | ||
raise ValueError(what) | ||||
def merge_regions(self): | ||||
"""Return sequences of matching and conflicting regions. | ||||
This returns tuples, where the first value says what kind we | ||||
have: | ||||
'unchanged', start, end | ||||
Take a region of base[start:end] | ||||
'same', astart, aend | ||||
b and a are different from base but give the same result | ||||
'a', start, end | ||||
Non-clashing insertion from a[start:end] | ||||
Ryan McElroy
|
r28070 | 'conflict', zstart, zend, astart, aend, bstart, bend | ||
Conflict between a and b, with z as common ancestor | ||||
Matt Mackall
|
r6002 | Method is as follows: | ||
The two sequences align only on regions which match the base | ||||
Matt Mackall
|
r14549 | and both descendants. These are found by doing a two-way diff | ||
Matt Mackall
|
r6002 | of each one against the base, and then finding the | ||
intersections between those regions. These "sync regions" | ||||
are by definition unchanged in both and easily dealt with. | ||||
The regions in between can be in any of three cases: | ||||
conflicted, or changed on only one side. | ||||
""" | ||||
# section a[0:ia] has been disposed of, etc | ||||
iz = ia = ib = 0 | ||||
Brodie Rao
|
r16683 | for region in self.find_sync_regions(): | ||
zmatch, zend, amatch, aend, bmatch, bend = region | ||||
Augie Fackler
|
r43346 | # print 'match base [%d:%d]' % (zmatch, zend) | ||
Matt Mackall
|
r6002 | |||
matchlen = zend - zmatch | ||||
assert matchlen >= 0 | ||||
assert matchlen == (aend - amatch) | ||||
assert matchlen == (bend - bmatch) | ||||
len_a = amatch - ia | ||||
len_b = bmatch - ib | ||||
len_base = zmatch - iz | ||||
assert len_a >= 0 | ||||
assert len_b >= 0 | ||||
assert len_base >= 0 | ||||
Augie Fackler
|
r43346 | # print 'unmatched a=%d, b=%d' % (len_a, len_b) | ||
Matt Mackall
|
r6002 | |||
if len_a or len_b: | ||||
# try to avoid actually slicing the lists | ||||
Augie Fackler
|
r43346 | equal_a = compare_range( | ||
self.a, ia, amatch, self.base, iz, zmatch | ||||
) | ||||
equal_b = compare_range( | ||||
self.b, ib, bmatch, self.base, iz, zmatch | ||||
) | ||||
same = compare_range(self.a, ia, amatch, self.b, ib, bmatch) | ||||
Matt Mackall
|
r6002 | |||
if same: | ||||
Augie Fackler
|
r43347 | yield b'same', ia, amatch | ||
Matt Mackall
|
r6002 | elif equal_a and not equal_b: | ||
Augie Fackler
|
r43347 | yield b'b', ib, bmatch | ||
Matt Mackall
|
r6002 | elif equal_b and not equal_a: | ||
Augie Fackler
|
r43347 | yield b'a', ia, amatch | ||
Matt Mackall
|
r6002 | elif not equal_a and not equal_b: | ||
Augie Fackler
|
r43347 | yield b'conflict', iz, zmatch, ia, amatch, ib, bmatch | ||
Matt Mackall
|
r6002 | else: | ||
Augie Fackler
|
r43347 | raise AssertionError(b"can't handle a=b=base but unmatched") | ||
Matt Mackall
|
r6002 | |||
ia = amatch | ||||
ib = bmatch | ||||
iz = zmatch | ||||
# if the same part of the base was deleted on both sides | ||||
# that's OK, we can just skip it. | ||||
if matchlen > 0: | ||||
assert ia == amatch | ||||
assert ib == bmatch | ||||
assert iz == zmatch | ||||
Augie Fackler
|
r43347 | yield b'unchanged', zmatch, zend | ||
Matt Mackall
|
r6002 | iz = zend | ||
ia = aend | ||||
ib = bend | ||||
Ryan McElroy
|
r28071 | def minimize(self, merge_regions): | ||
"""Trim conflict regions of lines where A and B sides match. | ||||
Mads Kiilerich
|
r30332 | Lines where both A and B have made the same changes at the beginning | ||
Ryan McElroy
|
r28071 | or the end of each merge region are eliminated from the conflict | ||
region and are instead considered the same. | ||||
""" | ||||
for region in merge_regions: | ||||
Augie Fackler
|
r43347 | if region[0] != b"conflict": | ||
Ryan McElroy
|
r28071 | yield region | ||
continue | ||||
Augie Fackler
|
r43787 | # pytype thinks this tuple contains only 3 things, but | ||
# that's clearly not true because this code successfully | ||||
# executes. It might be wise to rework merge_regions to be | ||||
# some kind of attrs type. | ||||
( | ||||
issue, | ||||
z1, | ||||
z2, | ||||
a1, | ||||
a2, | ||||
b1, | ||||
b2, | ||||
) = region # pytype: disable=bad-unpacking | ||||
Ryan McElroy
|
r28071 | alen = a2 - a1 | ||
blen = b2 - b1 | ||||
# find matches at the front | ||||
ii = 0 | ||||
Augie Fackler
|
r43346 | while ( | ||
ii < alen and ii < blen and self.a[a1 + ii] == self.b[b1 + ii] | ||||
): | ||||
Ryan McElroy
|
r28071 | ii += 1 | ||
startmatches = ii | ||||
# find matches at the end | ||||
ii = 0 | ||||
Augie Fackler
|
r43346 | while ( | ||
ii < alen | ||||
and ii < blen | ||||
and self.a[a2 - ii - 1] == self.b[b2 - ii - 1] | ||||
): | ||||
Ryan McElroy
|
r28071 | ii += 1 | ||
endmatches = ii | ||||
if startmatches > 0: | ||||
Augie Fackler
|
r43347 | yield b'same', a1, a1 + startmatches | ||
Ryan McElroy
|
r28071 | |||
Augie Fackler
|
r43346 | yield ( | ||
Augie Fackler
|
r43347 | b'conflict', | ||
Augie Fackler
|
r43346 | z1, | ||
z2, | ||||
a1 + startmatches, | ||||
a2 - endmatches, | ||||
b1 + startmatches, | ||||
b2 - endmatches, | ||||
) | ||||
Ryan McElroy
|
r28071 | |||
if endmatches > 0: | ||||
Augie Fackler
|
r43347 | yield b'same', a2 - endmatches, a2 | ||
Ryan McElroy
|
r28071 | |||
Matt Mackall
|
r6002 | def find_sync_regions(self): | ||
Matt Mackall
|
r14549 | """Return a list of sync regions, where both descendants match the base. | ||
Matt Mackall
|
r6002 | |||
Generates a list of (base1, base2, a1, a2, b1, b2). There is | ||||
always a zero-length sync region at the end of all the files. | ||||
""" | ||||
ia = ib = 0 | ||||
amatches = mdiff.get_matching_blocks(self.basetext, self.atext) | ||||
bmatches = mdiff.get_matching_blocks(self.basetext, self.btext) | ||||
len_a = len(amatches) | ||||
len_b = len(bmatches) | ||||
sl = [] | ||||
while ia < len_a and ib < len_b: | ||||
abase, amatch, alen = amatches[ia] | ||||
bbase, bmatch, blen = bmatches[ib] | ||||
# there is an unconflicted block at i; how long does it | ||||
# extend? until whichever one ends earlier. | ||||
Matt Mackall
|
r10282 | i = intersect((abase, abase + alen), (bbase, bbase + blen)) | ||
Matt Mackall
|
r6002 | if i: | ||
intbase = i[0] | ||||
intend = i[1] | ||||
intlen = intend - intbase | ||||
# found a match of base[i[0], i[1]]; this may be less than | ||||
# the region that matches in either one | ||||
assert intlen <= alen | ||||
assert intlen <= blen | ||||
assert abase <= intbase | ||||
assert bbase <= intbase | ||||
asub = amatch + (intbase - abase) | ||||
bsub = bmatch + (intbase - bbase) | ||||
aend = asub + intlen | ||||
bend = bsub + intlen | ||||
Augie Fackler
|
r41925 | assert self.base[intbase:intend] == self.a[asub:aend], ( | ||
Augie Fackler
|
r43346 | self.base[intbase:intend], | ||
self.a[asub:aend], | ||||
) | ||||
Matt Mackall
|
r6002 | |||
assert self.base[intbase:intend] == self.b[bsub:bend] | ||||
Augie Fackler
|
r43346 | sl.append((intbase, intend, asub, aend, bsub, bend)) | ||
Matt Mackall
|
r6002 | |||
# advance whichever one ends first in the base text | ||||
if (abase + alen) < (bbase + blen): | ||||
ia += 1 | ||||
else: | ||||
ib += 1 | ||||
intbase = len(self.base) | ||||
abase = len(self.a) | ||||
bbase = len(self.b) | ||||
sl.append((intbase, intbase, abase, abase, bbase, bbase)) | ||||
return sl | ||||
def find_unconflicted(self): | ||||
"""Return a list of ranges in base that are not conflicted.""" | ||||
am = mdiff.get_matching_blocks(self.basetext, self.atext) | ||||
bm = mdiff.get_matching_blocks(self.basetext, self.btext) | ||||
unc = [] | ||||
while am and bm: | ||||
# there is an unconflicted block at i; how long does it | ||||
# extend? until whichever one ends earlier. | ||||
a1 = am[0][0] | ||||
a2 = a1 + am[0][2] | ||||
b1 = bm[0][0] | ||||
b2 = b1 + bm[0][2] | ||||
i = intersect((a1, a2), (b1, b2)) | ||||
if i: | ||||
unc.append(i) | ||||
if a2 < b2: | ||||
del am[0] | ||||
else: | ||||
del bm[0] | ||||
return unc | ||||
Augie Fackler
|
r43346 | |||
Phil Cohen
|
r33824 | def _verifytext(text, path, ui, opts): | ||
"""verifies that text is non-binary (unless opts[text] is passed, | ||||
then we just warn)""" | ||||
Yuya Nishihara
|
r37102 | if stringutil.binary(text): | ||
Augie Fackler
|
r43347 | msg = _(b"%s looks like a binary file.") % path | ||
if not opts.get(b'quiet'): | ||||
ui.warn(_(b'warning: %s\n') % msg) | ||||
if not opts.get(b'text'): | ||||
Phil Cohen
|
r33824 | raise error.Abort(msg) | ||
return text | ||||
Augie Fackler
|
r43346 | |||
Phil Cohen
|
r33829 | def _picklabels(defaults, overrides): | ||
if len(overrides) > 3: | ||||
Augie Fackler
|
r43347 | raise error.Abort(_(b"can only specify three labels.")) | ||
Phil Cohen
|
r33934 | result = defaults[:] | ||
for i, override in enumerate(overrides): | ||||
result[i] = override | ||||
return result | ||||
Phil Cohen
|
r33829 | |||
Augie Fackler
|
r43346 | |||
Phil Cohen
|
r34051 | def simplemerge(ui, localctx, basectx, otherctx, **opts): | ||
Phil Cohen
|
r33827 | """Performs the simplemerge algorithm. | ||
Phil Cohen
|
r33908 | The merged result is written into `localctx`. | ||
Phil Cohen
|
r33906 | """ | ||
Pulkit Goyal
|
r35369 | opts = pycompat.byteskwargs(opts) | ||
Phil Cohen
|
r33827 | def readctx(ctx): | ||
Phil Cohen
|
r33903 | # Merges were always run in the working copy before, which means | ||
# they used decoded data, if the user defined any repository | ||||
# filters. | ||||
# | ||||
# Maintain that behavior today for BC, though perhaps in the future | ||||
# it'd be worth considering whether merging encoded data (what the | ||||
# repository usually sees) might be more useful. | ||||
return _verifytext(ctx.decodeddata(), ctx.path(), ui, opts) | ||||
Phil Cohen
|
r33827 | |||
Augie Fackler
|
r43347 | mode = opts.get(b'mode', b'merge') | ||
Phil Cohen
|
r33829 | name_a, name_b, name_base = None, None, None | ||
Augie Fackler
|
r43347 | if mode != b'union': | ||
Augie Fackler
|
r43346 | name_a, name_b, name_base = _picklabels( | ||
Augie Fackler
|
r43347 | [localctx.path(), otherctx.path(), None], opts.get(b'label', []) | ||
Augie Fackler
|
r43346 | ) | ||
Matt Mackall
|
r6002 | |||
Steve Borho
|
r14328 | try: | ||
Phil Cohen
|
r33906 | localtext = readctx(localctx) | ||
basetext = readctx(basectx) | ||||
othertext = readctx(otherctx) | ||||
Pierre-Yves David
|
r26587 | except error.Abort: | ||
Steve Borho
|
r14328 | return 1 | ||
Matt Mackall
|
r6002 | |||
m3 = Merge3Text(basetext, localtext, othertext) | ||||
Ryan McElroy
|
r28072 | extrakwargs = { | ||
Augie Fackler
|
r43347 | b"localorother": opts.get(b"localorother", None), | ||
b'minimize': True, | ||||
Augie Fackler
|
r43346 | } | ||
Augie Fackler
|
r43347 | if mode == b'union': | ||
extrakwargs[b'start_marker'] = None | ||||
extrakwargs[b'mid_marker'] = None | ||||
extrakwargs[b'end_marker'] = None | ||||
Erik Huelsmann
|
r26069 | elif name_base is not None: | ||
Augie Fackler
|
r43347 | extrakwargs[b'base_marker'] = b'|||||||' | ||
extrakwargs[b'name_base'] = name_base | ||||
extrakwargs[b'minimize'] = False | ||||
Phil Cohen
|
r33909 | |||
Augie Fackler
|
r43347 | mergedtext = b"" | ||
Augie Fackler
|
r43346 | for line in m3.merge_lines( | ||
name_a=name_a, name_b=name_b, **pycompat.strkwargs(extrakwargs) | ||||
): | ||||
Augie Fackler
|
r43347 | if opts.get(b'print'): | ||
Phil Cohen
|
r33909 | ui.fout.write(line) | ||
else: | ||||
mergedtext += line | ||||
Matt Mackall
|
r6002 | |||
Augie Fackler
|
r43347 | if not opts.get(b'print'): | ||
Phil Cohen
|
r33909 | localctx.write(mergedtext, localctx.flags()) | ||
Matt Mackall
|
r6002 | |||
Augie Fackler
|
r43347 | if m3.conflicts and not mode == b'union': | ||
Matt Mackall
|
r6002 | return 1 | ||