##// END OF EJS Templates
bdiff: don't check border condition in loop...
bdiff: don't check border condition in loop `plast = a + len - 1`. So, this "for" loop iterates from "a" to "plast", inclusive. So, `p == plast` can only be true on the final iteration of the loop. So checking for it on every loop iteration is wasteful. This patch simply decreases the upper bound of the loop by 1 and adds an explicit check after iteration for the `p == plast` case. We can't simply add 1 to the initial value for "i" because that doesn't do the correct thing on empty input strings. `perfbdiff -m 3041e4d59df2` on the Firefox repo becomes significantly faster: ! wall 0.072763 comb 0.070000 user 0.070000 sys 0.000000 (best of 100) ! wall 0.053221 comb 0.060000 user 0.060000 sys 0.000000 (best of 100) For the curious, this code has its origins in 8b067bde6679, which is the changeset that introduced bdiff.c in 2005. Also, GNU diffutils is able to perform a similar line-based diff in under 20ms. So there's likely more perf wins to be found in this code. One of them is the hashing algorithm. But it looks like mpm spent some time testing hash collisions in d0c48891dd4a. I'd like to do the same before switching away from lyhash, just to be on the safe side.

File last commit:

r28072:c3e9269d default
r30308:d500ddae default
Show More
simplemerge.py
466 lines | 15.0 KiB | text/x-python | PythonLexer
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 # Copyright (C) 2004, 2005 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
Martin Geisler
Remove FSF mailing address from GPL headers...
r15782 # along with this program; if not, see <http://www.gnu.org/licenses/>.
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002
# mbp: "you know that thing where cvs gives you conflict markers?"
# s: "i hate that."
Gregory Szorc
simplemerge: use absolute_import
r25974 from __future__ import absolute_import
import os
import sys
from .i18n import _
from . import (
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 error,
Gregory Szorc
simplemerge: use absolute_import
r25974 mdiff,
scmutil,
util,
)
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002
class CantReprocessAndShowBase(Exception):
pass
def intersect(ra, rb):
"""Given two ranges return the range where they intersect or None.
>>> intersect((0, 10), (0, 6))
(0, 6)
>>> intersect((0, 10), (5, 15))
(5, 10)
>>> intersect((0, 10), (10, 15))
>>> intersect((0, 9), (10, 15))
>>> intersect((0, 9), (7, 15))
(7, 9)
"""
assert ra[0] <= ra[1]
assert rb[0] <= rb[1]
sa = max(ra[0], rb[0])
sb = min(ra[1], rb[1])
if sa < sb:
return sa, sb
else:
return None
def compare_range(a, astart, aend, b, bstart, bend):
"""Compare a[astart:aend] == b[bstart:bend], without slicing.
"""
Matt Mackall
many, many trivial check-code fixups
r10282 if (aend - astart) != (bend - bstart):
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 return False
for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
if a[ia] != b[ib]:
return False
else:
return True
class Merge3Text(object):
"""3-way merge of texts.
Given strings BASE, OTHER, THIS, tries to produce a combined text
incorporating the changes from both BASE->OTHER and BASE->THIS."""
def __init__(self, basetext, atext, btext, base=None, a=None, b=None):
self.basetext = basetext
self.atext = atext
self.btext = btext
if base is None:
base = mdiff.splitnewlines(basetext)
if a is None:
a = mdiff.splitnewlines(atext)
if b is None:
b = mdiff.splitnewlines(btext)
self.base = base
self.a = a
self.b = b
def merge_lines(self,
name_a=None,
name_b=None,
name_base=None,
start_marker='<<<<<<<',
mid_marker='=======',
end_marker='>>>>>>>',
Jordi Gutiérrez Hermoso
simplemerge: enable option to resolve conflicts one way...
r26223 base_marker=None,
Ryan McElroy
merge: minimize conflicts when common base is not shown (issue4447)...
r28072 localorother=None,
minimize=False):
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 """Return merge in cvs-like form.
"""
self.conflicts = False
newline = '\n'
if len(self.a) > 0:
if self.a[0].endswith('\r\n'):
newline = '\r\n'
elif self.a[0].endswith('\r'):
newline = '\r'
Erik Huelsmann
simplemerge: add 'mode' kwarg which - when 'union' - suppresses markers...
r26069 if name_a and start_marker:
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 start_marker = start_marker + ' ' + name_a
Erik Huelsmann
simplemerge: add 'mode' kwarg which - when 'union' - suppresses markers...
r26069 if name_b and end_marker:
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 end_marker = end_marker + ' ' + name_b
if name_base and base_marker:
base_marker = base_marker + ' ' + name_base
merge_regions = self.merge_regions()
Ryan McElroy
merge: minimize conflicts when common base is not shown (issue4447)...
r28072 if minimize:
merge_regions = self.minimize(merge_regions)
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 for t in merge_regions:
what = t[0]
if what == 'unchanged':
for i in range(t[1], t[2]):
yield self.base[i]
Matt Mackall
backout most of 4f8067c94729
r12401 elif what == 'a' or what == 'same':
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 for i in range(t[1], t[2]):
yield self.a[i]
elif what == 'b':
for i in range(t[1], t[2]):
yield self.b[i]
elif what == 'conflict':
Jordi Gutiérrez Hermoso
simplemerge: enable option to resolve conflicts one way...
r26223 if localorother == 'local':
for i in range(t[3], t[4]):
yield self.a[i]
elif localorother == 'other':
for i in range(t[5], t[6]):
yield self.b[i]
else:
self.conflicts = True
if start_marker is not None:
yield start_marker + newline
for i in range(t[3], t[4]):
yield self.a[i]
if base_marker is not None:
yield base_marker + newline
for i in range(t[1], t[2]):
yield self.base[i]
if mid_marker is not None:
yield mid_marker + newline
for i in range(t[5], t[6]):
yield self.b[i]
if end_marker is not None:
yield end_marker + newline
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 else:
raise ValueError(what)
def merge_groups(self):
"""Yield sequence of line groups. Each one is a tuple:
'unchanged', lines
Lines unchanged from base
'a', lines
Lines taken from a
'same', lines
Lines taken from a (and equal to b)
'b', lines
Lines taken from b
'conflict', base_lines, a_lines, b_lines
Lines from base were changed to either a or b and conflict.
"""
for t in self.merge_regions():
what = t[0]
if what == 'unchanged':
yield what, self.base[t[1]:t[2]]
Matt Mackall
backout most of 4f8067c94729
r12401 elif what == 'a' or what == 'same':
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 yield what, self.a[t[1]:t[2]]
elif what == 'b':
yield what, self.b[t[1]:t[2]]
elif what == 'conflict':
yield (what,
self.base[t[1]:t[2]],
self.a[t[3]:t[4]],
self.b[t[5]:t[6]])
else:
raise ValueError(what)
def merge_regions(self):
"""Return sequences of matching and conflicting regions.
This returns tuples, where the first value says what kind we
have:
'unchanged', start, end
Take a region of base[start:end]
'same', astart, aend
b and a are different from base but give the same result
'a', start, end
Non-clashing insertion from a[start:end]
Ryan McElroy
merge: add some useful documentation
r28070 'conflict', zstart, zend, astart, aend, bstart, bend
Conflict between a and b, with z as common ancestor
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 Method is as follows:
The two sequences align only on regions which match the base
Matt Mackall
check-code: catch misspellings of descendant...
r14549 and both descendants. These are found by doing a two-way diff
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 of each one against the base, and then finding the
intersections between those regions. These "sync regions"
are by definition unchanged in both and easily dealt with.
The regions in between can be in any of three cases:
conflicted, or changed on only one side.
"""
# section a[0:ia] has been disposed of, etc
iz = ia = ib = 0
Brodie Rao
cleanup: eradicate long lines
r16683 for region in self.find_sync_regions():
zmatch, zend, amatch, aend, bmatch, bend = region
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 #print 'match base [%d:%d]' % (zmatch, zend)
matchlen = zend - zmatch
assert matchlen >= 0
assert matchlen == (aend - amatch)
assert matchlen == (bend - bmatch)
len_a = amatch - ia
len_b = bmatch - ib
len_base = zmatch - iz
assert len_a >= 0
assert len_b >= 0
assert len_base >= 0
#print 'unmatched a=%d, b=%d' % (len_a, len_b)
if len_a or len_b:
# try to avoid actually slicing the lists
equal_a = compare_range(self.a, ia, amatch,
self.base, iz, zmatch)
equal_b = compare_range(self.b, ib, bmatch,
self.base, iz, zmatch)
same = compare_range(self.a, ia, amatch,
self.b, ib, bmatch)
if same:
yield 'same', ia, amatch
elif equal_a and not equal_b:
yield 'b', ib, bmatch
elif equal_b and not equal_a:
yield 'a', ia, amatch
elif not equal_a and not equal_b:
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
else:
raise AssertionError("can't handle a=b=base but unmatched")
ia = amatch
ib = bmatch
iz = zmatch
# if the same part of the base was deleted on both sides
# that's OK, we can just skip it.
if matchlen > 0:
assert ia == amatch
assert ib == bmatch
assert iz == zmatch
yield 'unchanged', zmatch, zend
iz = zend
ia = aend
ib = bend
Ryan McElroy
merge: introduce method to minimize merge regions...
r28071 def minimize(self, merge_regions):
"""Trim conflict regions of lines where A and B sides match.
Lines where both A and B have made the same changes at the begining
or the end of each merge region are eliminated from the conflict
region and are instead considered the same.
"""
for region in merge_regions:
if region[0] != "conflict":
yield region
continue
issue, z1, z2, a1, a2, b1, b2 = region
alen = a2 - a1
blen = b2 - b1
# find matches at the front
ii = 0
while ii < alen and ii < blen and \
self.a[a1 + ii] == self.b[b1 + ii]:
ii += 1
startmatches = ii
# find matches at the end
ii = 0
while ii < alen and ii < blen and \
self.a[a2 - ii - 1] == self.b[b2 - ii - 1]:
ii += 1
endmatches = ii
if startmatches > 0:
yield 'same', a1, a1 + startmatches
yield ('conflict', z1, z2,
a1 + startmatches, a2 - endmatches,
b1 + startmatches, b2 - endmatches)
if endmatches > 0:
yield 'same', a2 - endmatches, a2
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 def find_sync_regions(self):
Matt Mackall
check-code: catch misspellings of descendant...
r14549 """Return a list of sync regions, where both descendants match the base.
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002
Generates a list of (base1, base2, a1, a2, b1, b2). There is
always a zero-length sync region at the end of all the files.
"""
ia = ib = 0
amatches = mdiff.get_matching_blocks(self.basetext, self.atext)
bmatches = mdiff.get_matching_blocks(self.basetext, self.btext)
len_a = len(amatches)
len_b = len(bmatches)
sl = []
while ia < len_a and ib < len_b:
abase, amatch, alen = amatches[ia]
bbase, bmatch, blen = bmatches[ib]
# there is an unconflicted block at i; how long does it
# extend? until whichever one ends earlier.
Matt Mackall
many, many trivial check-code fixups
r10282 i = intersect((abase, abase + alen), (bbase, bbase + blen))
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 if i:
intbase = i[0]
intend = i[1]
intlen = intend - intbase
# found a match of base[i[0], i[1]]; this may be less than
# the region that matches in either one
assert intlen <= alen
assert intlen <= blen
assert abase <= intbase
assert bbase <= intbase
asub = amatch + (intbase - abase)
bsub = bmatch + (intbase - bbase)
aend = asub + intlen
bend = bsub + intlen
assert self.base[intbase:intend] == self.a[asub:aend], \
(self.base[intbase:intend], self.a[asub:aend])
assert self.base[intbase:intend] == self.b[bsub:bend]
sl.append((intbase, intend,
asub, aend,
bsub, bend))
# advance whichever one ends first in the base text
if (abase + alen) < (bbase + blen):
ia += 1
else:
ib += 1
intbase = len(self.base)
abase = len(self.a)
bbase = len(self.b)
sl.append((intbase, intbase, abase, abase, bbase, bbase))
return sl
def find_unconflicted(self):
"""Return a list of ranges in base that are not conflicted."""
am = mdiff.get_matching_blocks(self.basetext, self.atext)
bm = mdiff.get_matching_blocks(self.basetext, self.btext)
unc = []
while am and bm:
# there is an unconflicted block at i; how long does it
# extend? until whichever one ends earlier.
a1 = am[0][0]
a2 = a1 + am[0][2]
b1 = bm[0][0]
b2 = b1 + bm[0][2]
i = intersect((a1, a2), (b1, b2))
if i:
unc.append(i)
if a2 < b2:
del am[0]
else:
del bm[0]
return unc
Steve Borho
simplemerge: use ui.warn() for warnings
r8269 def simplemerge(ui, local, base, other, **opts):
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 def readfile(filename):
f = open(filename, "rb")
text = f.read()
f.close()
if util.binary(text):
msg = _("%s looks like a binary file.") % filename
Steve Borho
simplemerge: do not allow binary files to abort an entire merge...
r14328 if not opts.get('quiet'):
ui.warn(_('warning: %s\n') % msg)
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 if not opts.get('text'):
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 raise error.Abort(msg)
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 return text
Erik Huelsmann
simplemerge: add 'mode' kwarg which - when 'union' - suppresses markers...
r26069 mode = opts.get('mode','merge')
if mode == 'union':
name_a = None
name_b = None
name_base = None
else:
name_a = local
name_b = other
name_base = None
labels = opts.get('label', [])
if len(labels) > 0:
name_a = labels[0]
if len(labels) > 1:
name_b = labels[1]
if len(labels) > 2:
name_base = labels[2]
if len(labels) > 3:
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 raise error.Abort(_("can only specify three labels."))
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002
Steve Borho
simplemerge: do not allow binary files to abort an entire merge...
r14328 try:
localtext = readfile(local)
basetext = readfile(base)
othertext = readfile(other)
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 except error.Abort:
Steve Borho
simplemerge: do not allow binary files to abort an entire merge...
r14328 return 1
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002
Matt Mackall
backout dbdb777502dc (issue3077) (issue3071)...
r15381 local = os.path.realpath(local)
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 if not opts.get('print'):
Adrian Buehlmann
move opener from util to scmutil
r13970 opener = scmutil.opener(os.path.dirname(local))
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 out = opener(os.path.basename(local), "w", atomictemp=True)
else:
out = sys.stdout
m3 = Merge3Text(basetext, localtext, othertext)
Ryan McElroy
merge: minimize conflicts when common base is not shown (issue4447)...
r28072 extrakwargs = {
"localorother": opts.get("localorother", None),
'minimize': True,
}
Erik Huelsmann
simplemerge: add 'mode' kwarg which - when 'union' - suppresses markers...
r26069 if mode == 'union':
extrakwargs['start_marker'] = None
extrakwargs['mid_marker'] = None
extrakwargs['end_marker'] = None
elif name_base is not None:
Pierre-Yves David
simplemerge: support three labels when merging...
r22024 extrakwargs['base_marker'] = '|||||||'
extrakwargs['name_base'] = name_base
Ryan McElroy
merge: minimize conflicts when common base is not shown (issue4447)...
r28072 extrakwargs['minimize'] = False
Pierre-Yves David
simplemerge: support three labels when merging...
r22024 for line in m3.merge_lines(name_a=name_a, name_b=name_b, **extrakwargs):
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 out.write(line)
if not opts.get('print'):
Greg Ward
atomictempfile: make close() consistent with other file-like objects....
r15057 out.close()
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002
Erik Huelsmann
simplemerge: add 'mode' kwarg which - when 'union' - suppresses markers...
r26069 if m3.conflicts and not mode == 'union':
Matt Mackall
merge: move the bulk of simplemerge into core...
r6002 return 1