##// END OF EJS Templates
mdiff: remove rewindhunk by yielding a bool first to indicate data...
Joerg Sonnenberger -
r35870:6a33e81e default
parent child Browse files
Show More
@@ -1,521 +1,522 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 policy,
18 18 pycompat,
19 19 util,
20 20 )
21 21
22 22 _missing_newline_marker = "\\ No newline at end of file\n"
23 23
24 24 bdiff = policy.importmod(r'bdiff')
25 25 mpatch = policy.importmod(r'mpatch')
26 26
27 27 blocks = bdiff.blocks
28 28 fixws = bdiff.fixws
29 29 patches = mpatch.patches
30 30 patchedsize = mpatch.patchedsize
31 31 textdiff = bdiff.bdiff
32 32
33 33 def splitnewlines(text):
34 34 '''like str.splitlines, but only split on newlines.'''
35 35 lines = [l + '\n' for l in text.split('\n')]
36 36 if lines:
37 37 if lines[-1] == '\n':
38 38 lines.pop()
39 39 else:
40 40 lines[-1] = lines[-1][:-1]
41 41 return lines
42 42
43 43 class diffopts(object):
44 44 '''context is the number of context lines
45 45 text treats all files as text
46 46 showfunc enables diff -p output
47 47 git enables the git extended patch format
48 48 nodates removes dates from diff headers
49 49 nobinary ignores binary files
50 50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
51 51 ignorews ignores all whitespace changes in the diff
52 52 ignorewsamount ignores changes in the amount of whitespace
53 53 ignoreblanklines ignores changes whose lines are all blank
54 54 upgrade generates git diffs to avoid data loss
55 55 '''
56 56
57 57 defaults = {
58 58 'context': 3,
59 59 'text': False,
60 60 'showfunc': False,
61 61 'git': False,
62 62 'nodates': False,
63 63 'nobinary': False,
64 64 'noprefix': False,
65 65 'index': 0,
66 66 'ignorews': False,
67 67 'ignorewsamount': False,
68 68 'ignorewseol': False,
69 69 'ignoreblanklines': False,
70 70 'upgrade': False,
71 71 'showsimilarity': False,
72 72 'worddiff': False,
73 73 }
74 74
75 75 def __init__(self, **opts):
76 76 opts = pycompat.byteskwargs(opts)
77 77 for k in self.defaults.keys():
78 78 v = opts.get(k)
79 79 if v is None:
80 80 v = self.defaults[k]
81 81 setattr(self, k, v)
82 82
83 83 try:
84 84 self.context = int(self.context)
85 85 except ValueError:
86 86 raise error.Abort(_('diff context lines count must be '
87 87 'an integer, not %r') % self.context)
88 88
89 89 def copy(self, **kwargs):
90 90 opts = dict((k, getattr(self, k)) for k in self.defaults)
91 91 opts = pycompat.strkwargs(opts)
92 92 opts.update(kwargs)
93 93 return diffopts(**opts)
94 94
95 95 defaultopts = diffopts()
96 96
97 97 def wsclean(opts, text, blank=True):
98 98 if opts.ignorews:
99 99 text = bdiff.fixws(text, 1)
100 100 elif opts.ignorewsamount:
101 101 text = bdiff.fixws(text, 0)
102 102 if blank and opts.ignoreblanklines:
103 103 text = re.sub('\n+', '\n', text).strip('\n')
104 104 if opts.ignorewseol:
105 105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
106 106 return text
107 107
108 108 def splitblock(base1, lines1, base2, lines2, opts):
109 109 # The input lines matches except for interwoven blank lines. We
110 110 # transform it into a sequence of matching blocks and blank blocks.
111 111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
112 112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
113 113 s1, e1 = 0, len(lines1)
114 114 s2, e2 = 0, len(lines2)
115 115 while s1 < e1 or s2 < e2:
116 116 i1, i2, btype = s1, s2, '='
117 117 if (i1 >= e1 or lines1[i1] == 0
118 118 or i2 >= e2 or lines2[i2] == 0):
119 119 # Consume the block of blank lines
120 120 btype = '~'
121 121 while i1 < e1 and lines1[i1] == 0:
122 122 i1 += 1
123 123 while i2 < e2 and lines2[i2] == 0:
124 124 i2 += 1
125 125 else:
126 126 # Consume the matching lines
127 127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
128 128 i1 += 1
129 129 i2 += 1
130 130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
131 131 s1 = i1
132 132 s2 = i2
133 133
134 134 def hunkinrange(hunk, linerange):
135 135 """Return True if `hunk` defined as (start, length) is in `linerange`
136 136 defined as (lowerbound, upperbound).
137 137
138 138 >>> hunkinrange((5, 10), (2, 7))
139 139 True
140 140 >>> hunkinrange((5, 10), (6, 12))
141 141 True
142 142 >>> hunkinrange((5, 10), (13, 17))
143 143 True
144 144 >>> hunkinrange((5, 10), (3, 17))
145 145 True
146 146 >>> hunkinrange((5, 10), (1, 3))
147 147 False
148 148 >>> hunkinrange((5, 10), (18, 20))
149 149 False
150 150 >>> hunkinrange((5, 10), (1, 5))
151 151 False
152 152 >>> hunkinrange((5, 10), (15, 27))
153 153 False
154 154 """
155 155 start, length = hunk
156 156 lowerbound, upperbound = linerange
157 157 return lowerbound < start + length and start < upperbound
158 158
159 159 def blocksinrange(blocks, rangeb):
160 160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
161 161 `rangeb` from ``(b1, b2)`` point of view.
162 162
163 163 Return `filteredblocks, rangea` where:
164 164
165 165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
166 166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
167 167 block ``(b1, b2)`` being inside `rangeb` if
168 168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
169 169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
170 170 """
171 171 lbb, ubb = rangeb
172 172 lba, uba = None, None
173 173 filteredblocks = []
174 174 for block in blocks:
175 175 (a1, a2, b1, b2), stype = block
176 176 if lbb >= b1 and ubb <= b2 and stype == '=':
177 177 # rangeb is within a single "=" hunk, restrict back linerange1
178 178 # by offsetting rangeb
179 179 lba = lbb - b1 + a1
180 180 uba = ubb - b1 + a1
181 181 else:
182 182 if b1 <= lbb < b2:
183 183 if stype == '=':
184 184 lba = a2 - (b2 - lbb)
185 185 else:
186 186 lba = a1
187 187 if b1 < ubb <= b2:
188 188 if stype == '=':
189 189 uba = a1 + (ubb - b1)
190 190 else:
191 191 uba = a2
192 192 if hunkinrange((b1, (b2 - b1)), rangeb):
193 193 filteredblocks.append(block)
194 194 if lba is None or uba is None or uba < lba:
195 195 raise error.Abort(_('line range exceeds file size'))
196 196 return filteredblocks, (lba, uba)
197 197
198 198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
199 199 """Return (block, type) tuples, where block is an mdiff.blocks
200 200 line entry. type is '=' for blocks matching exactly one another
201 201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
202 202 matching only after having filtered blank lines.
203 203 line1 and line2 are text1 and text2 split with splitnewlines() if
204 204 they are already available.
205 205 """
206 206 if opts is None:
207 207 opts = defaultopts
208 208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
209 209 text1 = wsclean(opts, text1, False)
210 210 text2 = wsclean(opts, text2, False)
211 211 diff = bdiff.blocks(text1, text2)
212 212 for i, s1 in enumerate(diff):
213 213 # The first match is special.
214 214 # we've either found a match starting at line 0 or a match later
215 215 # in the file. If it starts later, old and new below will both be
216 216 # empty and we'll continue to the next match.
217 217 if i > 0:
218 218 s = diff[i - 1]
219 219 else:
220 220 s = [0, 0, 0, 0]
221 221 s = [s[1], s1[0], s[3], s1[2]]
222 222
223 223 # bdiff sometimes gives huge matches past eof, this check eats them,
224 224 # and deals with the special first match case described above
225 225 if s[0] != s[1] or s[2] != s[3]:
226 226 type = '!'
227 227 if opts.ignoreblanklines:
228 228 if lines1 is None:
229 229 lines1 = splitnewlines(text1)
230 230 if lines2 is None:
231 231 lines2 = splitnewlines(text2)
232 232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
233 233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
234 234 if old == new:
235 235 type = '~'
236 236 yield s, type
237 237 yield s1, '='
238 238
239 239 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts, check_binary=True):
240 240 """Return a unified diff as a (headers, hunks) tuple.
241 241
242 242 If the diff is not null, `headers` is a list with unified diff header
243 243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
244 244 (hunkrange, hunklines) coming from _unidiff().
245 245 Otherwise, `headers` and `hunks` are empty.
246 246
247 247 Setting `check_binary` to false will skip the binary check, i.e. when
248 248 it has been done in advance. Files are expected to be text in this case.
249 249 """
250 250 def datetag(date, fn=None):
251 251 if not opts.git and not opts.nodates:
252 252 return '\t%s' % date
253 253 if fn and ' ' in fn:
254 254 return '\t'
255 255 return ''
256 256
257 257 sentinel = [], ()
258 258 if not a and not b:
259 259 return sentinel
260 260
261 261 if opts.noprefix:
262 262 aprefix = bprefix = ''
263 263 else:
264 264 aprefix = 'a/'
265 265 bprefix = 'b/'
266 266
267 267 epoch = util.datestr((0, 0))
268 268
269 269 fn1 = util.pconvert(fn1)
270 270 fn2 = util.pconvert(fn2)
271 271
272 272 if not opts.text and check_binary and (util.binary(a) or util.binary(b)):
273 273 if a and b and len(a) == len(b) and a == b:
274 274 return sentinel
275 275 headerlines = []
276 276 hunks = (None, ['Binary file %s has changed\n' % fn1]),
277 277 elif not a:
278 278 without_newline = b[-1] != '\n'
279 279 b = splitnewlines(b)
280 280 if a is None:
281 281 l1 = '--- /dev/null%s' % datetag(epoch)
282 282 else:
283 283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
284 284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
285 285 headerlines = [l1, l2]
286 286 size = len(b)
287 287 hunkrange = (0, 0, 1, size)
288 288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
289 289 if without_newline:
290 290 hunklines[-1] += '\n'
291 291 hunklines.append(_missing_newline_marker)
292 292 hunks = (hunkrange, hunklines),
293 293 elif not b:
294 294 without_newline = a[-1] != '\n'
295 295 a = splitnewlines(a)
296 296 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
297 297 if b is None:
298 298 l2 = '+++ /dev/null%s' % datetag(epoch)
299 299 else:
300 300 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
301 301 headerlines = [l1, l2]
302 302 size = len(a)
303 303 hunkrange = (1, size, 0, 0)
304 304 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
305 305 if without_newline:
306 306 hunklines[-1] += '\n'
307 307 hunklines.append(_missing_newline_marker)
308 308 hunks = (hunkrange, hunklines),
309 309 else:
310 diffhunks = _unidiff(a, b, opts=opts)
311 try:
312 hunkrange, hunklines = next(diffhunks)
313 except StopIteration:
310 hunks = _unidiff(a, b, opts=opts)
311 if not next(hunks):
314 312 return sentinel
315 313
316 314 headerlines = [
317 315 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
318 316 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
319 317 ]
320 def rewindhunks():
321 yield hunkrange, hunklines
322 for hr, hl in diffhunks:
323 yield hr, hl
324
325 hunks = rewindhunks()
326 318
327 319 return headerlines, hunks
328 320
329 321 def _unidiff(t1, t2, opts=defaultopts):
330 322 """Yield hunks of a headerless unified diff from t1 and t2 texts.
331 323
332 324 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
333 325 tuple (s1, l1, s2, l2) representing the range information of the hunk to
334 326 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
335 327 of the hunk combining said header followed by line additions and
336 328 deletions.
337 329
338 330 The hunks are prefixed with a bool.
339 331 """
340 332 l1 = splitnewlines(t1)
341 333 l2 = splitnewlines(t2)
342 334 def contextend(l, len):
343 335 ret = l + opts.context
344 336 if ret > len:
345 337 ret = len
346 338 return ret
347 339
348 340 def contextstart(l):
349 341 ret = l - opts.context
350 342 if ret < 0:
351 343 return 0
352 344 return ret
353 345
354 346 lastfunc = [0, '']
355 347 def yieldhunk(hunk):
356 348 (astart, a2, bstart, b2, delta) = hunk
357 349 aend = contextend(a2, len(l1))
358 350 alen = aend - astart
359 351 blen = b2 - bstart + aend - a2
360 352
361 353 func = ""
362 354 if opts.showfunc:
363 355 lastpos, func = lastfunc
364 356 # walk backwards from the start of the context up to the start of
365 357 # the previous hunk context until we find a line starting with an
366 358 # alphanumeric char.
367 359 for i in xrange(astart - 1, lastpos - 1, -1):
368 360 if l1[i][0:1].isalnum():
369 361 func = ' ' + l1[i].rstrip()[:40]
370 362 lastfunc[1] = func
371 363 break
372 364 # by recording this hunk's starting point as the next place to
373 365 # start looking for function lines, we avoid reading any line in
374 366 # the file more than once.
375 367 lastfunc[0] = astart
376 368
377 369 # zero-length hunk ranges report their start line as one less
378 370 if alen:
379 371 astart += 1
380 372 if blen:
381 373 bstart += 1
382 374
383 375 hunkrange = astart, alen, bstart, blen
384 376 hunklines = (
385 377 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
386 378 + delta
387 379 + [' ' + l1[x] for x in xrange(a2, aend)]
388 380 )
389 381 # If either file ends without a newline and the last line of
390 382 # that file is part of a hunk, a marker is printed. If the
391 383 # last line of both files is identical and neither ends in
392 384 # a newline, print only one marker. That's the only case in
393 385 # which the hunk can end in a shared line without a newline.
394 386 skip = False
395 387 if t1[-1] != '\n' and astart + alen == len(l1) + 1:
396 388 for i in xrange(len(hunklines) - 1, -1, -1):
397 389 if hunklines[i][0] in ('-', ' '):
398 390 if hunklines[i][0] == ' ':
399 391 skip = True
400 392 hunklines[i] += '\n'
401 393 hunklines.insert(i + 1, _missing_newline_marker)
402 394 break
403 395 if not skip and t2[-1] != '\n' and bstart + blen == len(l2) + 1:
404 396 for i in xrange(len(hunklines) - 1, -1, -1):
405 397 if hunklines[i][0] == '+':
406 398 hunklines[i] += '\n'
407 399 hunklines.insert(i + 1, _missing_newline_marker)
408 400 break
409 401 yield hunkrange, hunklines
410 402
411 403 # bdiff.blocks gives us the matching sequences in the files. The loop
412 404 # below finds the spaces between those matching sequences and translates
413 405 # them into diff output.
414 406 #
415 407 hunk = None
416 408 ignoredlines = 0
409 has_hunks = False
417 410 for s, stype in allblocks(t1, t2, opts, l1, l2):
418 411 a1, a2, b1, b2 = s
419 412 if stype != '!':
420 413 if stype == '~':
421 414 # The diff context lines are based on t1 content. When
422 415 # blank lines are ignored, the new lines offsets must
423 416 # be adjusted as if equivalent blocks ('~') had the
424 417 # same sizes on both sides.
425 418 ignoredlines += (b2 - b1) - (a2 - a1)
426 419 continue
427 420 delta = []
428 421 old = l1[a1:a2]
429 422 new = l2[b1:b2]
430 423
431 424 b1 -= ignoredlines
432 425 b2 -= ignoredlines
433 426 astart = contextstart(a1)
434 427 bstart = contextstart(b1)
435 428 prev = None
436 429 if hunk:
437 430 # join with the previous hunk if it falls inside the context
438 431 if astart < hunk[1] + opts.context + 1:
439 432 prev = hunk
440 433 astart = hunk[1]
441 434 bstart = hunk[3]
442 435 else:
436 if not has_hunks:
437 has_hunks = True
438 yield True
443 439 for x in yieldhunk(hunk):
444 440 yield x
445 441 if prev:
446 442 # we've joined the previous hunk, record the new ending points.
447 443 hunk[1] = a2
448 444 hunk[3] = b2
449 445 delta = hunk[4]
450 446 else:
451 447 # create a new hunk
452 448 hunk = [astart, a2, bstart, b2, delta]
453 449
454 450 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
455 451 delta[len(delta):] = ['-' + x for x in old]
456 452 delta[len(delta):] = ['+' + x for x in new]
457 453
458 454 if hunk:
455 if not has_hunks:
456 has_hunks = True
457 yield True
459 458 for x in yieldhunk(hunk):
460 459 yield x
460 elif not has_hunks:
461 yield False
461 462
462 463 def b85diff(to, tn):
463 464 '''print base85-encoded binary diff'''
464 465 def fmtline(line):
465 466 l = len(line)
466 467 if l <= 26:
467 468 l = chr(ord('A') + l - 1)
468 469 else:
469 470 l = chr(l - 26 + ord('a') - 1)
470 471 return '%c%s\n' % (l, util.b85encode(line, True))
471 472
472 473 def chunk(text, csize=52):
473 474 l = len(text)
474 475 i = 0
475 476 while i < l:
476 477 yield text[i:i + csize]
477 478 i += csize
478 479
479 480 if to is None:
480 481 to = ''
481 482 if tn is None:
482 483 tn = ''
483 484
484 485 if to == tn:
485 486 return ''
486 487
487 488 # TODO: deltas
488 489 ret = []
489 490 ret.append('GIT binary patch\n')
490 491 ret.append('literal %d\n' % len(tn))
491 492 for l in chunk(zlib.compress(tn)):
492 493 ret.append(fmtline(l))
493 494 ret.append('\n')
494 495
495 496 return ''.join(ret)
496 497
497 498 def patchtext(bin):
498 499 pos = 0
499 500 t = []
500 501 while pos < len(bin):
501 502 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
502 503 pos += 12
503 504 t.append(bin[pos:pos + l])
504 505 pos += l
505 506 return "".join(t)
506 507
507 508 def patch(a, bin):
508 509 if len(a) == 0:
509 510 # skip over trivial delta header
510 511 return util.buffer(bin, 12)
511 512 return mpatch.patches(a, [bin])
512 513
513 514 # similar to difflib.SequenceMatcher.get_matching_blocks
514 515 def get_matching_blocks(a, b):
515 516 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
516 517
517 518 def trivialdiffheader(length):
518 519 return struct.pack(">lll", 0, 0, length) if length else ''
519 520
520 521 def replacediffheader(oldlen, newlen):
521 522 return struct.pack(">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now