##// END OF EJS Templates
simplemerge: avoid quadratic concatenation when building output text...
Martin von Zweigbergk -
r46715:fd75e5c5 default
parent child Browse files
Show More
@@ -1,525 +1,526 b''
1 1 # Copyright (C) 2004, 2005 Canonical Ltd
2 2 #
3 3 # This program is free software; you can redistribute it and/or modify
4 4 # it under the terms of the GNU General Public License as published by
5 5 # the Free Software Foundation; either version 2 of the License, or
6 6 # (at your option) any later version.
7 7 #
8 8 # This program is distributed in the hope that it will be useful,
9 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 11 # GNU General Public License for more details.
12 12 #
13 13 # You should have received a copy of the GNU General Public License
14 14 # along with this program; if not, see <http://www.gnu.org/licenses/>.
15 15
16 16 # mbp: "you know that thing where cvs gives you conflict markers?"
17 17 # s: "i hate that."
18 18
19 19 from __future__ import absolute_import
20 20
21 21 from .i18n import _
22 22 from . import (
23 23 error,
24 24 mdiff,
25 25 node as nodemod,
26 26 pycompat,
27 27 util,
28 28 )
29 29 from .utils import stringutil
30 30
31 31
32 32 class CantReprocessAndShowBase(Exception):
33 33 pass
34 34
35 35
36 36 def intersect(ra, rb):
37 37 """Given two ranges return the range where they intersect or None.
38 38
39 39 >>> intersect((0, 10), (0, 6))
40 40 (0, 6)
41 41 >>> intersect((0, 10), (5, 15))
42 42 (5, 10)
43 43 >>> intersect((0, 10), (10, 15))
44 44 >>> intersect((0, 9), (10, 15))
45 45 >>> intersect((0, 9), (7, 15))
46 46 (7, 9)
47 47 """
48 48 assert ra[0] <= ra[1]
49 49 assert rb[0] <= rb[1]
50 50
51 51 sa = max(ra[0], rb[0])
52 52 sb = min(ra[1], rb[1])
53 53 if sa < sb:
54 54 return sa, sb
55 55 else:
56 56 return None
57 57
58 58
59 59 def compare_range(a, astart, aend, b, bstart, bend):
60 60 """Compare a[astart:aend] == b[bstart:bend], without slicing."""
61 61 if (aend - astart) != (bend - bstart):
62 62 return False
63 63 for ia, ib in zip(
64 64 pycompat.xrange(astart, aend), pycompat.xrange(bstart, bend)
65 65 ):
66 66 if a[ia] != b[ib]:
67 67 return False
68 68 else:
69 69 return True
70 70
71 71
72 72 class Merge3Text(object):
73 73 """3-way merge of texts.
74 74
75 75 Given strings BASE, OTHER, THIS, tries to produce a combined text
76 76 incorporating the changes from both BASE->OTHER and BASE->THIS."""
77 77
78 78 def __init__(self, basetext, atext, btext, base=None, a=None, b=None):
79 79 self.basetext = basetext
80 80 self.atext = atext
81 81 self.btext = btext
82 82 if base is None:
83 83 base = mdiff.splitnewlines(basetext)
84 84 if a is None:
85 85 a = mdiff.splitnewlines(atext)
86 86 if b is None:
87 87 b = mdiff.splitnewlines(btext)
88 88 self.base = base
89 89 self.a = a
90 90 self.b = b
91 91
92 92 def merge_lines(
93 93 self,
94 94 name_a=None,
95 95 name_b=None,
96 96 name_base=None,
97 97 start_marker=b'<<<<<<<',
98 98 mid_marker=b'=======',
99 99 end_marker=b'>>>>>>>',
100 100 base_marker=None,
101 101 localorother=None,
102 102 minimize=False,
103 103 ):
104 104 """Return merge in cvs-like form."""
105 105 self.conflicts = False
106 106 newline = b'\n'
107 107 if len(self.a) > 0:
108 108 if self.a[0].endswith(b'\r\n'):
109 109 newline = b'\r\n'
110 110 elif self.a[0].endswith(b'\r'):
111 111 newline = b'\r'
112 112 if name_a and start_marker:
113 113 start_marker = start_marker + b' ' + name_a
114 114 if name_b and end_marker:
115 115 end_marker = end_marker + b' ' + name_b
116 116 if name_base and base_marker:
117 117 base_marker = base_marker + b' ' + name_base
118 118 merge_regions = self.merge_regions()
119 119 if minimize:
120 120 merge_regions = self.minimize(merge_regions)
121 121 for t in merge_regions:
122 122 what = t[0]
123 123 if what == b'unchanged':
124 124 for i in range(t[1], t[2]):
125 125 yield self.base[i]
126 126 elif what == b'a' or what == b'same':
127 127 for i in range(t[1], t[2]):
128 128 yield self.a[i]
129 129 elif what == b'b':
130 130 for i in range(t[1], t[2]):
131 131 yield self.b[i]
132 132 elif what == b'conflict':
133 133 if localorother == b'local':
134 134 for i in range(t[3], t[4]):
135 135 yield self.a[i]
136 136 elif localorother == b'other':
137 137 for i in range(t[5], t[6]):
138 138 yield self.b[i]
139 139 else:
140 140 self.conflicts = True
141 141 if start_marker is not None:
142 142 yield start_marker + newline
143 143 for i in range(t[3], t[4]):
144 144 yield self.a[i]
145 145 if base_marker is not None:
146 146 yield base_marker + newline
147 147 for i in range(t[1], t[2]):
148 148 yield self.base[i]
149 149 if mid_marker is not None:
150 150 yield mid_marker + newline
151 151 for i in range(t[5], t[6]):
152 152 yield self.b[i]
153 153 if end_marker is not None:
154 154 yield end_marker + newline
155 155 else:
156 156 raise ValueError(what)
157 157
158 158 def merge_groups(self):
159 159 """Yield sequence of line groups. Each one is a tuple:
160 160
161 161 'unchanged', lines
162 162 Lines unchanged from base
163 163
164 164 'a', lines
165 165 Lines taken from a
166 166
167 167 'same', lines
168 168 Lines taken from a (and equal to b)
169 169
170 170 'b', lines
171 171 Lines taken from b
172 172
173 173 'conflict', base_lines, a_lines, b_lines
174 174 Lines from base were changed to either a or b and conflict.
175 175 """
176 176 for t in self.merge_regions():
177 177 what = t[0]
178 178 if what == b'unchanged':
179 179 yield what, self.base[t[1] : t[2]]
180 180 elif what == b'a' or what == b'same':
181 181 yield what, self.a[t[1] : t[2]]
182 182 elif what == b'b':
183 183 yield what, self.b[t[1] : t[2]]
184 184 elif what == b'conflict':
185 185 yield (
186 186 what,
187 187 self.base[t[1] : t[2]],
188 188 self.a[t[3] : t[4]],
189 189 self.b[t[5] : t[6]],
190 190 )
191 191 else:
192 192 raise ValueError(what)
193 193
194 194 def merge_regions(self):
195 195 """Return sequences of matching and conflicting regions.
196 196
197 197 This returns tuples, where the first value says what kind we
198 198 have:
199 199
200 200 'unchanged', start, end
201 201 Take a region of base[start:end]
202 202
203 203 'same', astart, aend
204 204 b and a are different from base but give the same result
205 205
206 206 'a', start, end
207 207 Non-clashing insertion from a[start:end]
208 208
209 209 'conflict', zstart, zend, astart, aend, bstart, bend
210 210 Conflict between a and b, with z as common ancestor
211 211
212 212 Method is as follows:
213 213
214 214 The two sequences align only on regions which match the base
215 215 and both descendants. These are found by doing a two-way diff
216 216 of each one against the base, and then finding the
217 217 intersections between those regions. These "sync regions"
218 218 are by definition unchanged in both and easily dealt with.
219 219
220 220 The regions in between can be in any of three cases:
221 221 conflicted, or changed on only one side.
222 222 """
223 223
224 224 # section a[0:ia] has been disposed of, etc
225 225 iz = ia = ib = 0
226 226
227 227 for region in self.find_sync_regions():
228 228 zmatch, zend, amatch, aend, bmatch, bend = region
229 229 # print 'match base [%d:%d]' % (zmatch, zend)
230 230
231 231 matchlen = zend - zmatch
232 232 assert matchlen >= 0
233 233 assert matchlen == (aend - amatch)
234 234 assert matchlen == (bend - bmatch)
235 235
236 236 len_a = amatch - ia
237 237 len_b = bmatch - ib
238 238 len_base = zmatch - iz
239 239 assert len_a >= 0
240 240 assert len_b >= 0
241 241 assert len_base >= 0
242 242
243 243 # print 'unmatched a=%d, b=%d' % (len_a, len_b)
244 244
245 245 if len_a or len_b:
246 246 # try to avoid actually slicing the lists
247 247 equal_a = compare_range(
248 248 self.a, ia, amatch, self.base, iz, zmatch
249 249 )
250 250 equal_b = compare_range(
251 251 self.b, ib, bmatch, self.base, iz, zmatch
252 252 )
253 253 same = compare_range(self.a, ia, amatch, self.b, ib, bmatch)
254 254
255 255 if same:
256 256 yield b'same', ia, amatch
257 257 elif equal_a and not equal_b:
258 258 yield b'b', ib, bmatch
259 259 elif equal_b and not equal_a:
260 260 yield b'a', ia, amatch
261 261 elif not equal_a and not equal_b:
262 262 yield b'conflict', iz, zmatch, ia, amatch, ib, bmatch
263 263 else:
264 264 raise AssertionError(b"can't handle a=b=base but unmatched")
265 265
266 266 ia = amatch
267 267 ib = bmatch
268 268 iz = zmatch
269 269
270 270 # if the same part of the base was deleted on both sides
271 271 # that's OK, we can just skip it.
272 272
273 273 if matchlen > 0:
274 274 assert ia == amatch
275 275 assert ib == bmatch
276 276 assert iz == zmatch
277 277
278 278 yield b'unchanged', zmatch, zend
279 279 iz = zend
280 280 ia = aend
281 281 ib = bend
282 282
283 283 def minimize(self, merge_regions):
284 284 """Trim conflict regions of lines where A and B sides match.
285 285
286 286 Lines where both A and B have made the same changes at the beginning
287 287 or the end of each merge region are eliminated from the conflict
288 288 region and are instead considered the same.
289 289 """
290 290 for region in merge_regions:
291 291 if region[0] != b"conflict":
292 292 yield region
293 293 continue
294 294 # pytype thinks this tuple contains only 3 things, but
295 295 # that's clearly not true because this code successfully
296 296 # executes. It might be wise to rework merge_regions to be
297 297 # some kind of attrs type.
298 298 (
299 299 issue,
300 300 z1,
301 301 z2,
302 302 a1,
303 303 a2,
304 304 b1,
305 305 b2,
306 306 ) = region # pytype: disable=bad-unpacking
307 307 alen = a2 - a1
308 308 blen = b2 - b1
309 309
310 310 # find matches at the front
311 311 ii = 0
312 312 while (
313 313 ii < alen and ii < blen and self.a[a1 + ii] == self.b[b1 + ii]
314 314 ):
315 315 ii += 1
316 316 startmatches = ii
317 317
318 318 # find matches at the end
319 319 ii = 0
320 320 while (
321 321 ii < alen
322 322 and ii < blen
323 323 and self.a[a2 - ii - 1] == self.b[b2 - ii - 1]
324 324 ):
325 325 ii += 1
326 326 endmatches = ii
327 327
328 328 if startmatches > 0:
329 329 yield b'same', a1, a1 + startmatches
330 330
331 331 yield (
332 332 b'conflict',
333 333 z1,
334 334 z2,
335 335 a1 + startmatches,
336 336 a2 - endmatches,
337 337 b1 + startmatches,
338 338 b2 - endmatches,
339 339 )
340 340
341 341 if endmatches > 0:
342 342 yield b'same', a2 - endmatches, a2
343 343
344 344 def find_sync_regions(self):
345 345 """Return a list of sync regions, where both descendants match the base.
346 346
347 347 Generates a list of (base1, base2, a1, a2, b1, b2). There is
348 348 always a zero-length sync region at the end of all the files.
349 349 """
350 350
351 351 ia = ib = 0
352 352 amatches = mdiff.get_matching_blocks(self.basetext, self.atext)
353 353 bmatches = mdiff.get_matching_blocks(self.basetext, self.btext)
354 354 len_a = len(amatches)
355 355 len_b = len(bmatches)
356 356
357 357 sl = []
358 358
359 359 while ia < len_a and ib < len_b:
360 360 abase, amatch, alen = amatches[ia]
361 361 bbase, bmatch, blen = bmatches[ib]
362 362
363 363 # there is an unconflicted block at i; how long does it
364 364 # extend? until whichever one ends earlier.
365 365 i = intersect((abase, abase + alen), (bbase, bbase + blen))
366 366 if i:
367 367 intbase = i[0]
368 368 intend = i[1]
369 369 intlen = intend - intbase
370 370
371 371 # found a match of base[i[0], i[1]]; this may be less than
372 372 # the region that matches in either one
373 373 assert intlen <= alen
374 374 assert intlen <= blen
375 375 assert abase <= intbase
376 376 assert bbase <= intbase
377 377
378 378 asub = amatch + (intbase - abase)
379 379 bsub = bmatch + (intbase - bbase)
380 380 aend = asub + intlen
381 381 bend = bsub + intlen
382 382
383 383 assert self.base[intbase:intend] == self.a[asub:aend], (
384 384 self.base[intbase:intend],
385 385 self.a[asub:aend],
386 386 )
387 387
388 388 assert self.base[intbase:intend] == self.b[bsub:bend]
389 389
390 390 sl.append((intbase, intend, asub, aend, bsub, bend))
391 391
392 392 # advance whichever one ends first in the base text
393 393 if (abase + alen) < (bbase + blen):
394 394 ia += 1
395 395 else:
396 396 ib += 1
397 397
398 398 intbase = len(self.base)
399 399 abase = len(self.a)
400 400 bbase = len(self.b)
401 401 sl.append((intbase, intbase, abase, abase, bbase, bbase))
402 402
403 403 return sl
404 404
405 405 def find_unconflicted(self):
406 406 """Return a list of ranges in base that are not conflicted."""
407 407 am = mdiff.get_matching_blocks(self.basetext, self.atext)
408 408 bm = mdiff.get_matching_blocks(self.basetext, self.btext)
409 409
410 410 unc = []
411 411
412 412 while am and bm:
413 413 # there is an unconflicted block at i; how long does it
414 414 # extend? until whichever one ends earlier.
415 415 a1 = am[0][0]
416 416 a2 = a1 + am[0][2]
417 417 b1 = bm[0][0]
418 418 b2 = b1 + bm[0][2]
419 419 i = intersect((a1, a2), (b1, b2))
420 420 if i:
421 421 unc.append(i)
422 422
423 423 if a2 < b2:
424 424 del am[0]
425 425 else:
426 426 del bm[0]
427 427
428 428 return unc
429 429
430 430
431 431 def _verifytext(text, path, ui, opts):
432 432 """verifies that text is non-binary (unless opts[text] is passed,
433 433 then we just warn)"""
434 434 if stringutil.binary(text):
435 435 msg = _(b"%s looks like a binary file.") % path
436 436 if not opts.get('quiet'):
437 437 ui.warn(_(b'warning: %s\n') % msg)
438 438 if not opts.get('text'):
439 439 raise error.Abort(msg)
440 440 return text
441 441
442 442
443 443 def _picklabels(defaults, overrides):
444 444 if len(overrides) > 3:
445 445 raise error.Abort(_(b"can only specify three labels."))
446 446 result = defaults[:]
447 447 for i, override in enumerate(overrides):
448 448 result[i] = override
449 449 return result
450 450
451 451
452 452 def is_not_null(ctx):
453 453 if not util.safehasattr(ctx, "node"):
454 454 return False
455 455 return ctx.node() != nodemod.nullid
456 456
457 457
458 458 def simplemerge(ui, localctx, basectx, otherctx, **opts):
459 459 """Performs the simplemerge algorithm.
460 460
461 461 The merged result is written into `localctx`.
462 462 """
463 463
464 464 def readctx(ctx):
465 465 # Merges were always run in the working copy before, which means
466 466 # they used decoded data, if the user defined any repository
467 467 # filters.
468 468 #
469 469 # Maintain that behavior today for BC, though perhaps in the future
470 470 # it'd be worth considering whether merging encoded data (what the
471 471 # repository usually sees) might be more useful.
472 472 return _verifytext(ctx.decodeddata(), ctx.path(), ui, opts)
473 473
474 474 mode = opts.get('mode', b'merge')
475 475 name_a, name_b, name_base = None, None, None
476 476 if mode != b'union':
477 477 name_a, name_b, name_base = _picklabels(
478 478 [localctx.path(), otherctx.path(), None], opts.get('label', [])
479 479 )
480 480
481 481 try:
482 482 localtext = readctx(localctx)
483 483 basetext = readctx(basectx)
484 484 othertext = readctx(otherctx)
485 485 except error.Abort:
486 486 return 1
487 487
488 488 m3 = Merge3Text(basetext, localtext, othertext)
489 489 extrakwargs = {
490 490 b"localorother": opts.get("localorother", None),
491 491 b'minimize': True,
492 492 }
493 493 if mode == b'union':
494 494 extrakwargs[b'start_marker'] = None
495 495 extrakwargs[b'mid_marker'] = None
496 496 extrakwargs[b'end_marker'] = None
497 497 elif name_base is not None:
498 498 extrakwargs[b'base_marker'] = b'|||||||'
499 499 extrakwargs[b'name_base'] = name_base
500 500 extrakwargs[b'minimize'] = False
501 501
502 mergedtext = b""
502 lines = []
503 503 for line in m3.merge_lines(
504 504 name_a=name_a, name_b=name_b, **pycompat.strkwargs(extrakwargs)
505 505 ):
506 506 if opts.get('print'):
507 507 ui.fout.write(line)
508 508 else:
509 mergedtext += line
509 lines.append(line)
510 510
511 511 # merge flags if necessary
512 512 flags = localctx.flags()
513 513 localflags = set(pycompat.iterbytestr(flags))
514 514 otherflags = set(pycompat.iterbytestr(otherctx.flags()))
515 515 if is_not_null(basectx) and localflags != otherflags:
516 516 baseflags = set(pycompat.iterbytestr(basectx.flags()))
517 517 commonflags = localflags & otherflags
518 518 addedflags = (localflags ^ otherflags) - baseflags
519 519 flags = b''.join(sorted(commonflags | addedflags))
520 520
521 521 if not opts.get('print'):
522 mergedtext = b''.join(lines)
522 523 localctx.write(mergedtext, flags)
523 524
524 525 if m3.conflicts and not mode == b'union':
525 526 return 1
General Comments 0
You need to be logged in to leave comments. Login now