##// END OF EJS Templates
simplemerge: rewrite `merge_lines()` using `merge_groups()`...
Martin von Zweigbergk -
r49380:88a45330 default
parent child Browse files
Show More
@@ -1,552 +1,527 b''
1 1 # Copyright (C) 2004, 2005 Canonical Ltd
2 2 #
3 3 # This program is free software; you can redistribute it and/or modify
4 4 # it under the terms of the GNU General Public License as published by
5 5 # the Free Software Foundation; either version 2 of the License, or
6 6 # (at your option) any later version.
7 7 #
8 8 # This program is distributed in the hope that it will be useful,
9 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 11 # GNU General Public License for more details.
12 12 #
13 13 # You should have received a copy of the GNU General Public License
14 14 # along with this program; if not, see <http://www.gnu.org/licenses/>.
15 15
16 16 # mbp: "you know that thing where cvs gives you conflict markers?"
17 17 # s: "i hate that."
18 18
19 19 from __future__ import absolute_import
20 20
21 21 from .i18n import _
22 22 from . import (
23 23 error,
24 24 mdiff,
25 25 pycompat,
26 26 )
27 27 from .utils import stringutil
28 28
29 29
30 30 class CantReprocessAndShowBase(Exception):
31 31 pass
32 32
33 33
34 34 def intersect(ra, rb):
35 35 """Given two ranges return the range where they intersect or None.
36 36
37 37 >>> intersect((0, 10), (0, 6))
38 38 (0, 6)
39 39 >>> intersect((0, 10), (5, 15))
40 40 (5, 10)
41 41 >>> intersect((0, 10), (10, 15))
42 42 >>> intersect((0, 9), (10, 15))
43 43 >>> intersect((0, 9), (7, 15))
44 44 (7, 9)
45 45 """
46 46 assert ra[0] <= ra[1]
47 47 assert rb[0] <= rb[1]
48 48
49 49 sa = max(ra[0], rb[0])
50 50 sb = min(ra[1], rb[1])
51 51 if sa < sb:
52 52 return sa, sb
53 53 else:
54 54 return None
55 55
56 56
57 57 def compare_range(a, astart, aend, b, bstart, bend):
58 58 """Compare a[astart:aend] == b[bstart:bend], without slicing."""
59 59 if (aend - astart) != (bend - bstart):
60 60 return False
61 61 for ia, ib in zip(
62 62 pycompat.xrange(astart, aend), pycompat.xrange(bstart, bend)
63 63 ):
64 64 if a[ia] != b[ib]:
65 65 return False
66 66 else:
67 67 return True
68 68
69 69
70 70 class Merge3Text(object):
71 71 """3-way merge of texts.
72 72
73 73 Given strings BASE, OTHER, THIS, tries to produce a combined text
74 74 incorporating the changes from both BASE->OTHER and BASE->THIS."""
75 75
76 76 def __init__(self, basetext, atext, btext, base=None, a=None, b=None):
77 77 self.basetext = basetext
78 78 self.atext = atext
79 79 self.btext = btext
80 80 if base is None:
81 81 base = mdiff.splitnewlines(basetext)
82 82 if a is None:
83 83 a = mdiff.splitnewlines(atext)
84 84 if b is None:
85 85 b = mdiff.splitnewlines(btext)
86 86 self.base = base
87 87 self.a = a
88 88 self.b = b
89 89
90 90 def merge_lines(
91 91 self,
92 92 name_a=None,
93 93 name_b=None,
94 94 name_base=None,
95 95 start_marker=b'<<<<<<<',
96 96 mid_marker=b'=======',
97 97 end_marker=b'>>>>>>>',
98 98 base_marker=None,
99 99 minimize=False,
100 100 ):
101 101 """Return merge in cvs-like form."""
102 102 self.conflicts = False
103 103 newline = b'\n'
104 104 if len(self.a) > 0:
105 105 if self.a[0].endswith(b'\r\n'):
106 106 newline = b'\r\n'
107 107 elif self.a[0].endswith(b'\r'):
108 108 newline = b'\r'
109 109 if name_a and start_marker:
110 110 start_marker = start_marker + b' ' + name_a
111 111 if name_b and end_marker:
112 112 end_marker = end_marker + b' ' + name_b
113 113 if name_base and base_marker:
114 114 base_marker = base_marker + b' ' + name_base
115 merge_regions = self.merge_regions()
115 merge_groups = self.merge_groups()
116 116 if minimize:
117 merge_regions = self.minimize(merge_regions)
118 for t in merge_regions:
119 what = t[0]
120 if what == b'unchanged':
121 for i in range(t[1], t[2]):
122 yield self.base[i]
123 elif what == b'a' or what == b'same':
124 for i in range(t[1], t[2]):
125 yield self.a[i]
126 elif what == b'b':
127 for i in range(t[1], t[2]):
128 yield self.b[i]
129 elif what == b'conflict':
117 merge_groups = self.minimize(merge_groups)
118 for what, lines in merge_groups:
119 if what == b'conflict':
120 base_lines, a_lines, b_lines = lines
130 121 self.conflicts = True
131 122 if start_marker is not None:
132 123 yield start_marker + newline
133 for i in range(t[3], t[4]):
134 yield self.a[i]
124 for line in a_lines:
125 yield line
135 126 if base_marker is not None:
136 127 yield base_marker + newline
137 for i in range(t[1], t[2]):
138 yield self.base[i]
128 for line in base_lines:
129 yield line
139 130 if mid_marker is not None:
140 131 yield mid_marker + newline
141 for i in range(t[5], t[6]):
142 yield self.b[i]
132 for line in b_lines:
133 yield line
143 134 if end_marker is not None:
144 135 yield end_marker + newline
145 136 else:
146 raise ValueError(what)
137 for line in lines:
138 yield line
147 139
148 140 def merge_groups(self):
149 141 """Yield sequence of line groups. Each one is a tuple:
150 142
151 143 'unchanged', lines
152 144 Lines unchanged from base
153 145
154 146 'a', lines
155 147 Lines taken from a
156 148
157 149 'same', lines
158 150 Lines taken from a (and equal to b)
159 151
160 152 'b', lines
161 153 Lines taken from b
162 154
163 155 'conflict', (base_lines, a_lines, b_lines)
164 156 Lines from base were changed to either a or b and conflict.
165 157 """
166 158 for t in self.merge_regions():
167 159 what = t[0]
168 160 if what == b'unchanged':
169 161 yield what, self.base[t[1] : t[2]]
170 162 elif what == b'a' or what == b'same':
171 163 yield what, self.a[t[1] : t[2]]
172 164 elif what == b'b':
173 165 yield what, self.b[t[1] : t[2]]
174 166 elif what == b'conflict':
175 167 yield (
176 168 what,
177 169 (
178 170 self.base[t[1] : t[2]],
179 171 self.a[t[3] : t[4]],
180 172 self.b[t[5] : t[6]],
181 173 ),
182 174 )
183 175 else:
184 176 raise ValueError(what)
185 177
186 178 def merge_regions(self):
187 179 """Return sequences of matching and conflicting regions.
188 180
189 181 This returns tuples, where the first value says what kind we
190 182 have:
191 183
192 184 'unchanged', start, end
193 185 Take a region of base[start:end]
194 186
195 187 'same', astart, aend
196 188 b and a are different from base but give the same result
197 189
198 190 'a', start, end
199 191 Non-clashing insertion from a[start:end]
200 192
201 193 'conflict', zstart, zend, astart, aend, bstart, bend
202 194 Conflict between a and b, with z as common ancestor
203 195
204 196 Method is as follows:
205 197
206 198 The two sequences align only on regions which match the base
207 199 and both descendants. These are found by doing a two-way diff
208 200 of each one against the base, and then finding the
209 201 intersections between those regions. These "sync regions"
210 202 are by definition unchanged in both and easily dealt with.
211 203
212 204 The regions in between can be in any of three cases:
213 205 conflicted, or changed on only one side.
214 206 """
215 207
216 208 # section a[0:ia] has been disposed of, etc
217 209 iz = ia = ib = 0
218 210
219 211 for region in self.find_sync_regions():
220 212 zmatch, zend, amatch, aend, bmatch, bend = region
221 213 # print 'match base [%d:%d]' % (zmatch, zend)
222 214
223 215 matchlen = zend - zmatch
224 216 assert matchlen >= 0
225 217 assert matchlen == (aend - amatch)
226 218 assert matchlen == (bend - bmatch)
227 219
228 220 len_a = amatch - ia
229 221 len_b = bmatch - ib
230 222 len_base = zmatch - iz
231 223 assert len_a >= 0
232 224 assert len_b >= 0
233 225 assert len_base >= 0
234 226
235 227 # print 'unmatched a=%d, b=%d' % (len_a, len_b)
236 228
237 229 if len_a or len_b:
238 230 # try to avoid actually slicing the lists
239 231 equal_a = compare_range(
240 232 self.a, ia, amatch, self.base, iz, zmatch
241 233 )
242 234 equal_b = compare_range(
243 235 self.b, ib, bmatch, self.base, iz, zmatch
244 236 )
245 237 same = compare_range(self.a, ia, amatch, self.b, ib, bmatch)
246 238
247 239 if same:
248 240 yield b'same', ia, amatch
249 241 elif equal_a and not equal_b:
250 242 yield b'b', ib, bmatch
251 243 elif equal_b and not equal_a:
252 244 yield b'a', ia, amatch
253 245 elif not equal_a and not equal_b:
254 246 yield b'conflict', iz, zmatch, ia, amatch, ib, bmatch
255 247 else:
256 248 raise AssertionError(b"can't handle a=b=base but unmatched")
257 249
258 250 ia = amatch
259 251 ib = bmatch
260 252 iz = zmatch
261 253
262 254 # if the same part of the base was deleted on both sides
263 255 # that's OK, we can just skip it.
264 256
265 257 if matchlen > 0:
266 258 assert ia == amatch
267 259 assert ib == bmatch
268 260 assert iz == zmatch
269 261
270 262 yield b'unchanged', zmatch, zend
271 263 iz = zend
272 264 ia = aend
273 265 ib = bend
274 266
275 def minimize(self, merge_regions):
267 def minimize(self, merge_groups):
276 268 """Trim conflict regions of lines where A and B sides match.
277 269
278 270 Lines where both A and B have made the same changes at the beginning
279 271 or the end of each merge region are eliminated from the conflict
280 272 region and are instead considered the same.
281 273 """
282 for region in merge_regions:
283 if region[0] != b"conflict":
284 yield region
274 for what, lines in merge_groups:
275 if what != b"conflict":
276 yield what, lines
285 277 continue
286 # pytype thinks this tuple contains only 3 things, but
287 # that's clearly not true because this code successfully
288 # executes. It might be wise to rework merge_regions to be
289 # some kind of attrs type.
290 (
291 issue,
292 z1,
293 z2,
294 a1,
295 a2,
296 b1,
297 b2,
298 ) = region # pytype: disable=bad-unpacking
299 alen = a2 - a1
300 blen = b2 - b1
278 base_lines, a_lines, b_lines = lines
279 alen = len(a_lines)
280 blen = len(b_lines)
301 281
302 282 # find matches at the front
303 283 ii = 0
304 while (
305 ii < alen and ii < blen and self.a[a1 + ii] == self.b[b1 + ii]
306 ):
284 while ii < alen and ii < blen and a_lines[ii] == b_lines[ii]:
307 285 ii += 1
308 286 startmatches = ii
309 287
310 288 # find matches at the end
311 289 ii = 0
312 290 while (
313 ii < alen
314 and ii < blen
315 and self.a[a2 - ii - 1] == self.b[b2 - ii - 1]
291 ii < alen and ii < blen and a_lines[-ii - 1] == b_lines[-ii - 1]
316 292 ):
317 293 ii += 1
318 294 endmatches = ii
319 295
320 296 if startmatches > 0:
321 yield b'same', a1, a1 + startmatches
297 yield b'same', a_lines[:startmatches]
322 298
323 299 yield (
324 300 b'conflict',
325 z1,
326 z2,
327 a1 + startmatches,
328 a2 - endmatches,
329 b1 + startmatches,
330 b2 - endmatches,
301 (
302 base_lines,
303 a_lines[startmatches : alen - endmatches],
304 b_lines[startmatches : blen - endmatches],
305 ),
331 306 )
332 307
333 308 if endmatches > 0:
334 yield b'same', a2 - endmatches, a2
309 yield b'same', a_lines[alen - endmatches :]
335 310
336 311 def find_sync_regions(self):
337 312 """Return a list of sync regions, where both descendants match the base.
338 313
339 314 Generates a list of (base1, base2, a1, a2, b1, b2). There is
340 315 always a zero-length sync region at the end of all the files.
341 316 """
342 317
343 318 ia = ib = 0
344 319 amatches = mdiff.get_matching_blocks(self.basetext, self.atext)
345 320 bmatches = mdiff.get_matching_blocks(self.basetext, self.btext)
346 321 len_a = len(amatches)
347 322 len_b = len(bmatches)
348 323
349 324 sl = []
350 325
351 326 while ia < len_a and ib < len_b:
352 327 abase, amatch, alen = amatches[ia]
353 328 bbase, bmatch, blen = bmatches[ib]
354 329
355 330 # there is an unconflicted block at i; how long does it
356 331 # extend? until whichever one ends earlier.
357 332 i = intersect((abase, abase + alen), (bbase, bbase + blen))
358 333 if i:
359 334 intbase = i[0]
360 335 intend = i[1]
361 336 intlen = intend - intbase
362 337
363 338 # found a match of base[i[0], i[1]]; this may be less than
364 339 # the region that matches in either one
365 340 assert intlen <= alen
366 341 assert intlen <= blen
367 342 assert abase <= intbase
368 343 assert bbase <= intbase
369 344
370 345 asub = amatch + (intbase - abase)
371 346 bsub = bmatch + (intbase - bbase)
372 347 aend = asub + intlen
373 348 bend = bsub + intlen
374 349
375 350 assert self.base[intbase:intend] == self.a[asub:aend], (
376 351 self.base[intbase:intend],
377 352 self.a[asub:aend],
378 353 )
379 354
380 355 assert self.base[intbase:intend] == self.b[bsub:bend]
381 356
382 357 sl.append((intbase, intend, asub, aend, bsub, bend))
383 358
384 359 # advance whichever one ends first in the base text
385 360 if (abase + alen) < (bbase + blen):
386 361 ia += 1
387 362 else:
388 363 ib += 1
389 364
390 365 intbase = len(self.base)
391 366 abase = len(self.a)
392 367 bbase = len(self.b)
393 368 sl.append((intbase, intbase, abase, abase, bbase, bbase))
394 369
395 370 return sl
396 371
397 372
398 373 def _verifytext(text, path, ui, opts):
399 374 """verifies that text is non-binary (unless opts[text] is passed,
400 375 then we just warn)"""
401 376 if stringutil.binary(text):
402 377 msg = _(b"%s looks like a binary file.") % path
403 378 if not opts.get('quiet'):
404 379 ui.warn(_(b'warning: %s\n') % msg)
405 380 if not opts.get('text'):
406 381 raise error.Abort(msg)
407 382 return text
408 383
409 384
410 385 def _picklabels(defaults, overrides):
411 386 if len(overrides) > 3:
412 387 raise error.Abort(_(b"can only specify three labels."))
413 388 result = defaults[:]
414 389 for i, override in enumerate(overrides):
415 390 result[i] = override
416 391 return result
417 392
418 393
419 394 def _mergediff(m3, name_a, name_b, name_base):
420 395 lines = []
421 396 conflicts = False
422 397 for what, group_lines in m3.merge_groups():
423 398 if what == b'conflict':
424 399 base_lines, a_lines, b_lines = group_lines
425 400 base_text = b''.join(base_lines)
426 401 b_blocks = list(
427 402 mdiff.allblocks(
428 403 base_text,
429 404 b''.join(b_lines),
430 405 lines1=base_lines,
431 406 lines2=b_lines,
432 407 )
433 408 )
434 409 a_blocks = list(
435 410 mdiff.allblocks(
436 411 base_text,
437 412 b''.join(a_lines),
438 413 lines1=base_lines,
439 414 lines2=b_lines,
440 415 )
441 416 )
442 417
443 418 def matching_lines(blocks):
444 419 return sum(
445 420 block[1] - block[0]
446 421 for block, kind in blocks
447 422 if kind == b'='
448 423 )
449 424
450 425 def diff_lines(blocks, lines1, lines2):
451 426 for block, kind in blocks:
452 427 if kind == b'=':
453 428 for line in lines1[block[0] : block[1]]:
454 429 yield b' ' + line
455 430 else:
456 431 for line in lines1[block[0] : block[1]]:
457 432 yield b'-' + line
458 433 for line in lines2[block[2] : block[3]]:
459 434 yield b'+' + line
460 435
461 436 lines.append(b"<<<<<<<\n")
462 437 if matching_lines(a_blocks) < matching_lines(b_blocks):
463 438 lines.append(b"======= %s\n" % name_a)
464 439 lines.extend(a_lines)
465 440 lines.append(b"------- %s\n" % name_base)
466 441 lines.append(b"+++++++ %s\n" % name_b)
467 442 lines.extend(diff_lines(b_blocks, base_lines, b_lines))
468 443 else:
469 444 lines.append(b"------- %s\n" % name_base)
470 445 lines.append(b"+++++++ %s\n" % name_a)
471 446 lines.extend(diff_lines(a_blocks, base_lines, a_lines))
472 447 lines.append(b"======= %s\n" % name_b)
473 448 lines.extend(b_lines)
474 449 lines.append(b">>>>>>>\n")
475 450 conflicts = True
476 451 else:
477 452 lines.extend(group_lines)
478 453 return lines, conflicts
479 454
480 455
481 456 def _resolve(m3, sides):
482 457 lines = []
483 458 for what, group_lines in m3.merge_groups():
484 459 if what == b'conflict':
485 460 for side in sides:
486 461 lines.extend(group_lines[side])
487 462 else:
488 463 lines.extend(group_lines)
489 464 return lines
490 465
491 466
492 467 def simplemerge(ui, localctx, basectx, otherctx, **opts):
493 468 """Performs the simplemerge algorithm.
494 469
495 470 The merged result is written into `localctx`.
496 471 """
497 472
498 473 def readctx(ctx):
499 474 # Merges were always run in the working copy before, which means
500 475 # they used decoded data, if the user defined any repository
501 476 # filters.
502 477 #
503 478 # Maintain that behavior today for BC, though perhaps in the future
504 479 # it'd be worth considering whether merging encoded data (what the
505 480 # repository usually sees) might be more useful.
506 481 return _verifytext(ctx.decodeddata(), ctx.path(), ui, opts)
507 482
508 483 try:
509 484 localtext = readctx(localctx)
510 485 basetext = readctx(basectx)
511 486 othertext = readctx(otherctx)
512 487 except error.Abort:
513 488 return 1
514 489
515 490 m3 = Merge3Text(basetext, localtext, othertext)
516 491 conflicts = False
517 492 mode = opts.get('mode', b'merge')
518 493 if mode == b'union':
519 494 lines = _resolve(m3, (1, 2))
520 495 elif mode == b'local':
521 496 lines = _resolve(m3, (1,))
522 497 elif mode == b'other':
523 498 lines = _resolve(m3, (2,))
524 499 else:
525 500 name_a, name_b, name_base = _picklabels(
526 501 [localctx.path(), otherctx.path(), None], opts.get('label', [])
527 502 )
528 503 if mode == b'mergediff':
529 504 lines, conflicts = _mergediff(m3, name_a, name_b, name_base)
530 505 else:
531 506 extrakwargs = {
532 507 'minimize': True,
533 508 }
534 509 if name_base is not None:
535 510 extrakwargs['base_marker'] = b'|||||||'
536 511 extrakwargs['name_base'] = name_base
537 512 extrakwargs['minimize'] = False
538 513 lines = list(
539 514 m3.merge_lines(name_a=name_a, name_b=name_b, **extrakwargs)
540 515 )
541 516 conflicts = m3.conflicts
542 517
543 518 mergedtext = b''.join(lines)
544 519 if opts.get('print'):
545 520 ui.fout.write(mergedtext)
546 521 else:
547 522 # localctx.flags() already has the merged flags (done in
548 523 # mergestate.resolve())
549 524 localctx.write(mergedtext, localctx.flags())
550 525
551 526 if conflicts:
552 527 return 1
General Comments 0
You need to be logged in to leave comments. Login now