##// END OF EJS Templates
mdiff: mark diffopts as having dynamic attributes...
Augie Fackler -
r43784:f2f460cd default
parent child Browse files
Show More
@@ -1,554 +1,557 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from .pycompat import (
16 16 getattr,
17 17 setattr,
18 18 )
19 19 from . import (
20 20 encoding,
21 21 error,
22 22 policy,
23 23 pycompat,
24 24 util,
25 25 )
26 26 from .utils import dateutil
27 27
28 28 _missing_newline_marker = b"\\ No newline at end of file\n"
29 29
30 30 bdiff = policy.importmod(r'bdiff')
31 31 mpatch = policy.importmod(r'mpatch')
32 32
33 33 blocks = bdiff.blocks
34 34 fixws = bdiff.fixws
35 35 patches = mpatch.patches
36 36 patchedsize = mpatch.patchedsize
37 37 textdiff = bdiff.bdiff
38 38 splitnewlines = bdiff.splitnewlines
39 39
40 40
41 # TODO: this looks like it could be an attrs, which might help pytype
41 42 class diffopts(object):
42 43 '''context is the number of context lines
43 44 text treats all files as text
44 45 showfunc enables diff -p output
45 46 git enables the git extended patch format
46 47 nodates removes dates from diff headers
47 48 nobinary ignores binary files
48 49 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
49 50 ignorews ignores all whitespace changes in the diff
50 51 ignorewsamount ignores changes in the amount of whitespace
51 52 ignoreblanklines ignores changes whose lines are all blank
52 53 upgrade generates git diffs to avoid data loss
53 54 '''
54 55
56 _HAS_DYNAMIC_ATTRIBUTES = True
57
55 58 defaults = {
56 59 b'context': 3,
57 60 b'text': False,
58 61 b'showfunc': False,
59 62 b'git': False,
60 63 b'nodates': False,
61 64 b'nobinary': False,
62 65 b'noprefix': False,
63 66 b'index': 0,
64 67 b'ignorews': False,
65 68 b'ignorewsamount': False,
66 69 b'ignorewseol': False,
67 70 b'ignoreblanklines': False,
68 71 b'upgrade': False,
69 72 b'showsimilarity': False,
70 73 b'worddiff': False,
71 74 b'xdiff': False,
72 75 }
73 76
74 77 def __init__(self, **opts):
75 78 opts = pycompat.byteskwargs(opts)
76 79 for k in self.defaults.keys():
77 80 v = opts.get(k)
78 81 if v is None:
79 82 v = self.defaults[k]
80 83 setattr(self, k, v)
81 84
82 85 try:
83 86 self.context = int(self.context)
84 87 except ValueError:
85 88 raise error.Abort(
86 89 _(b'diff context lines count must be an integer, not %r')
87 90 % pycompat.bytestr(self.context)
88 91 )
89 92
90 93 def copy(self, **kwargs):
91 94 opts = dict((k, getattr(self, k)) for k in self.defaults)
92 95 opts = pycompat.strkwargs(opts)
93 96 opts.update(kwargs)
94 97 return diffopts(**opts)
95 98
96 99
97 100 defaultopts = diffopts()
98 101
99 102
100 103 def wsclean(opts, text, blank=True):
101 104 if opts.ignorews:
102 105 text = bdiff.fixws(text, 1)
103 106 elif opts.ignorewsamount:
104 107 text = bdiff.fixws(text, 0)
105 108 if blank and opts.ignoreblanklines:
106 109 text = re.sub(b'\n+', b'\n', text).strip(b'\n')
107 110 if opts.ignorewseol:
108 111 text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
109 112 return text
110 113
111 114
112 115 def splitblock(base1, lines1, base2, lines2, opts):
113 116 # The input lines matches except for interwoven blank lines. We
114 117 # transform it into a sequence of matching blocks and blank blocks.
115 118 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
116 119 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
117 120 s1, e1 = 0, len(lines1)
118 121 s2, e2 = 0, len(lines2)
119 122 while s1 < e1 or s2 < e2:
120 123 i1, i2, btype = s1, s2, b'='
121 124 if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
122 125 # Consume the block of blank lines
123 126 btype = b'~'
124 127 while i1 < e1 and lines1[i1] == 0:
125 128 i1 += 1
126 129 while i2 < e2 and lines2[i2] == 0:
127 130 i2 += 1
128 131 else:
129 132 # Consume the matching lines
130 133 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
131 134 i1 += 1
132 135 i2 += 1
133 136 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
134 137 s1 = i1
135 138 s2 = i2
136 139
137 140
138 141 def hunkinrange(hunk, linerange):
139 142 """Return True if `hunk` defined as (start, length) is in `linerange`
140 143 defined as (lowerbound, upperbound).
141 144
142 145 >>> hunkinrange((5, 10), (2, 7))
143 146 True
144 147 >>> hunkinrange((5, 10), (6, 12))
145 148 True
146 149 >>> hunkinrange((5, 10), (13, 17))
147 150 True
148 151 >>> hunkinrange((5, 10), (3, 17))
149 152 True
150 153 >>> hunkinrange((5, 10), (1, 3))
151 154 False
152 155 >>> hunkinrange((5, 10), (18, 20))
153 156 False
154 157 >>> hunkinrange((5, 10), (1, 5))
155 158 False
156 159 >>> hunkinrange((5, 10), (15, 27))
157 160 False
158 161 """
159 162 start, length = hunk
160 163 lowerbound, upperbound = linerange
161 164 return lowerbound < start + length and start < upperbound
162 165
163 166
164 167 def blocksinrange(blocks, rangeb):
165 168 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
166 169 `rangeb` from ``(b1, b2)`` point of view.
167 170
168 171 Return `filteredblocks, rangea` where:
169 172
170 173 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
171 174 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
172 175 block ``(b1, b2)`` being inside `rangeb` if
173 176 ``rangeb[0] < b2 and b1 < rangeb[1]``;
174 177 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
175 178 """
176 179 lbb, ubb = rangeb
177 180 lba, uba = None, None
178 181 filteredblocks = []
179 182 for block in blocks:
180 183 (a1, a2, b1, b2), stype = block
181 184 if lbb >= b1 and ubb <= b2 and stype == b'=':
182 185 # rangeb is within a single "=" hunk, restrict back linerange1
183 186 # by offsetting rangeb
184 187 lba = lbb - b1 + a1
185 188 uba = ubb - b1 + a1
186 189 else:
187 190 if b1 <= lbb < b2:
188 191 if stype == b'=':
189 192 lba = a2 - (b2 - lbb)
190 193 else:
191 194 lba = a1
192 195 if b1 < ubb <= b2:
193 196 if stype == b'=':
194 197 uba = a1 + (ubb - b1)
195 198 else:
196 199 uba = a2
197 200 if hunkinrange((b1, (b2 - b1)), rangeb):
198 201 filteredblocks.append(block)
199 202 if lba is None or uba is None or uba < lba:
200 203 raise error.Abort(_(b'line range exceeds file size'))
201 204 return filteredblocks, (lba, uba)
202 205
203 206
204 207 def chooseblocksfunc(opts=None):
205 208 if (
206 209 opts is None
207 210 or not opts.xdiff
208 211 or not util.safehasattr(bdiff, b'xdiffblocks')
209 212 ):
210 213 return bdiff.blocks
211 214 else:
212 215 return bdiff.xdiffblocks
213 216
214 217
215 218 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
216 219 """Return (block, type) tuples, where block is an mdiff.blocks
217 220 line entry. type is '=' for blocks matching exactly one another
218 221 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
219 222 matching only after having filtered blank lines.
220 223 line1 and line2 are text1 and text2 split with splitnewlines() if
221 224 they are already available.
222 225 """
223 226 if opts is None:
224 227 opts = defaultopts
225 228 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
226 229 text1 = wsclean(opts, text1, False)
227 230 text2 = wsclean(opts, text2, False)
228 231 diff = chooseblocksfunc(opts)(text1, text2)
229 232 for i, s1 in enumerate(diff):
230 233 # The first match is special.
231 234 # we've either found a match starting at line 0 or a match later
232 235 # in the file. If it starts later, old and new below will both be
233 236 # empty and we'll continue to the next match.
234 237 if i > 0:
235 238 s = diff[i - 1]
236 239 else:
237 240 s = [0, 0, 0, 0]
238 241 s = [s[1], s1[0], s[3], s1[2]]
239 242
240 243 # bdiff sometimes gives huge matches past eof, this check eats them,
241 244 # and deals with the special first match case described above
242 245 if s[0] != s[1] or s[2] != s[3]:
243 246 type = b'!'
244 247 if opts.ignoreblanklines:
245 248 if lines1 is None:
246 249 lines1 = splitnewlines(text1)
247 250 if lines2 is None:
248 251 lines2 = splitnewlines(text2)
249 252 old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
250 253 new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
251 254 if old == new:
252 255 type = b'~'
253 256 yield s, type
254 257 yield s1, b'='
255 258
256 259
257 260 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
258 261 """Return a unified diff as a (headers, hunks) tuple.
259 262
260 263 If the diff is not null, `headers` is a list with unified diff header
261 264 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
262 265 (hunkrange, hunklines) coming from _unidiff().
263 266 Otherwise, `headers` and `hunks` are empty.
264 267
265 268 Set binary=True if either a or b should be taken as a binary file.
266 269 """
267 270
268 271 def datetag(date, fn=None):
269 272 if not opts.git and not opts.nodates:
270 273 return b'\t%s' % date
271 274 if fn and b' ' in fn:
272 275 return b'\t'
273 276 return b''
274 277
275 278 sentinel = [], ()
276 279 if not a and not b:
277 280 return sentinel
278 281
279 282 if opts.noprefix:
280 283 aprefix = bprefix = b''
281 284 else:
282 285 aprefix = b'a/'
283 286 bprefix = b'b/'
284 287
285 288 epoch = dateutil.datestr((0, 0))
286 289
287 290 fn1 = util.pconvert(fn1)
288 291 fn2 = util.pconvert(fn2)
289 292
290 293 if binary:
291 294 if a and b and len(a) == len(b) and a == b:
292 295 return sentinel
293 296 headerlines = []
294 297 hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
295 298 elif not a:
296 299 without_newline = not b.endswith(b'\n')
297 300 b = splitnewlines(b)
298 301 if a is None:
299 302 l1 = b'--- /dev/null%s' % datetag(epoch)
300 303 else:
301 304 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
302 305 l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
303 306 headerlines = [l1, l2]
304 307 size = len(b)
305 308 hunkrange = (0, 0, 1, size)
306 309 hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
307 310 if without_newline:
308 311 hunklines[-1] += b'\n'
309 312 hunklines.append(_missing_newline_marker)
310 313 hunks = ((hunkrange, hunklines),)
311 314 elif not b:
312 315 without_newline = not a.endswith(b'\n')
313 316 a = splitnewlines(a)
314 317 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
315 318 if b is None:
316 319 l2 = b'+++ /dev/null%s' % datetag(epoch)
317 320 else:
318 321 l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
319 322 headerlines = [l1, l2]
320 323 size = len(a)
321 324 hunkrange = (1, size, 0, 0)
322 325 hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
323 326 if without_newline:
324 327 hunklines[-1] += b'\n'
325 328 hunklines.append(_missing_newline_marker)
326 329 hunks = ((hunkrange, hunklines),)
327 330 else:
328 331 hunks = _unidiff(a, b, opts=opts)
329 332 if not next(hunks):
330 333 return sentinel
331 334
332 335 headerlines = [
333 336 b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
334 337 b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
335 338 ]
336 339
337 340 return headerlines, hunks
338 341
339 342
340 343 def _unidiff(t1, t2, opts=defaultopts):
341 344 """Yield hunks of a headerless unified diff from t1 and t2 texts.
342 345
343 346 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
344 347 tuple (s1, l1, s2, l2) representing the range information of the hunk to
345 348 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
346 349 of the hunk combining said header followed by line additions and
347 350 deletions.
348 351
349 352 The hunks are prefixed with a bool.
350 353 """
351 354 l1 = splitnewlines(t1)
352 355 l2 = splitnewlines(t2)
353 356
354 357 def contextend(l, len):
355 358 ret = l + opts.context
356 359 if ret > len:
357 360 ret = len
358 361 return ret
359 362
360 363 def contextstart(l):
361 364 ret = l - opts.context
362 365 if ret < 0:
363 366 return 0
364 367 return ret
365 368
366 369 lastfunc = [0, b'']
367 370
368 371 def yieldhunk(hunk):
369 372 (astart, a2, bstart, b2, delta) = hunk
370 373 aend = contextend(a2, len(l1))
371 374 alen = aend - astart
372 375 blen = b2 - bstart + aend - a2
373 376
374 377 func = b""
375 378 if opts.showfunc:
376 379 lastpos, func = lastfunc
377 380 # walk backwards from the start of the context up to the start of
378 381 # the previous hunk context until we find a line starting with an
379 382 # alphanumeric char.
380 383 for i in pycompat.xrange(astart - 1, lastpos - 1, -1):
381 384 if l1[i][0:1].isalnum():
382 385 func = b' ' + l1[i].rstrip()
383 386 # split long function name if ASCII. otherwise we have no
384 387 # idea where the multi-byte boundary is, so just leave it.
385 388 if encoding.isasciistr(func):
386 389 func = func[:41]
387 390 lastfunc[1] = func
388 391 break
389 392 # by recording this hunk's starting point as the next place to
390 393 # start looking for function lines, we avoid reading any line in
391 394 # the file more than once.
392 395 lastfunc[0] = astart
393 396
394 397 # zero-length hunk ranges report their start line as one less
395 398 if alen:
396 399 astart += 1
397 400 if blen:
398 401 bstart += 1
399 402
400 403 hunkrange = astart, alen, bstart, blen
401 404 hunklines = (
402 405 [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
403 406 + delta
404 407 + [b' ' + l1[x] for x in pycompat.xrange(a2, aend)]
405 408 )
406 409 # If either file ends without a newline and the last line of
407 410 # that file is part of a hunk, a marker is printed. If the
408 411 # last line of both files is identical and neither ends in
409 412 # a newline, print only one marker. That's the only case in
410 413 # which the hunk can end in a shared line without a newline.
411 414 skip = False
412 415 if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
413 416 for i in pycompat.xrange(len(hunklines) - 1, -1, -1):
414 417 if hunklines[i].startswith((b'-', b' ')):
415 418 if hunklines[i].startswith(b' '):
416 419 skip = True
417 420 hunklines[i] += b'\n'
418 421 hunklines.insert(i + 1, _missing_newline_marker)
419 422 break
420 423 if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
421 424 for i in pycompat.xrange(len(hunklines) - 1, -1, -1):
422 425 if hunklines[i].startswith(b'+'):
423 426 hunklines[i] += b'\n'
424 427 hunklines.insert(i + 1, _missing_newline_marker)
425 428 break
426 429 yield hunkrange, hunklines
427 430
428 431 # bdiff.blocks gives us the matching sequences in the files. The loop
429 432 # below finds the spaces between those matching sequences and translates
430 433 # them into diff output.
431 434 #
432 435 hunk = None
433 436 ignoredlines = 0
434 437 has_hunks = False
435 438 for s, stype in allblocks(t1, t2, opts, l1, l2):
436 439 a1, a2, b1, b2 = s
437 440 if stype != b'!':
438 441 if stype == b'~':
439 442 # The diff context lines are based on t1 content. When
440 443 # blank lines are ignored, the new lines offsets must
441 444 # be adjusted as if equivalent blocks ('~') had the
442 445 # same sizes on both sides.
443 446 ignoredlines += (b2 - b1) - (a2 - a1)
444 447 continue
445 448 delta = []
446 449 old = l1[a1:a2]
447 450 new = l2[b1:b2]
448 451
449 452 b1 -= ignoredlines
450 453 b2 -= ignoredlines
451 454 astart = contextstart(a1)
452 455 bstart = contextstart(b1)
453 456 prev = None
454 457 if hunk:
455 458 # join with the previous hunk if it falls inside the context
456 459 if astart < hunk[1] + opts.context + 1:
457 460 prev = hunk
458 461 astart = hunk[1]
459 462 bstart = hunk[3]
460 463 else:
461 464 if not has_hunks:
462 465 has_hunks = True
463 466 yield True
464 467 for x in yieldhunk(hunk):
465 468 yield x
466 469 if prev:
467 470 # we've joined the previous hunk, record the new ending points.
468 471 hunk[1] = a2
469 472 hunk[3] = b2
470 473 delta = hunk[4]
471 474 else:
472 475 # create a new hunk
473 476 hunk = [astart, a2, bstart, b2, delta]
474 477
475 478 delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
476 479 delta[len(delta) :] = [b'-' + x for x in old]
477 480 delta[len(delta) :] = [b'+' + x for x in new]
478 481
479 482 if hunk:
480 483 if not has_hunks:
481 484 has_hunks = True
482 485 yield True
483 486 for x in yieldhunk(hunk):
484 487 yield x
485 488 elif not has_hunks:
486 489 yield False
487 490
488 491
489 492 def b85diff(to, tn):
490 493 '''print base85-encoded binary diff'''
491 494
492 495 def fmtline(line):
493 496 l = len(line)
494 497 if l <= 26:
495 498 l = pycompat.bytechr(ord(b'A') + l - 1)
496 499 else:
497 500 l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
498 501 return b'%c%s\n' % (l, util.b85encode(line, True))
499 502
500 503 def chunk(text, csize=52):
501 504 l = len(text)
502 505 i = 0
503 506 while i < l:
504 507 yield text[i : i + csize]
505 508 i += csize
506 509
507 510 if to is None:
508 511 to = b''
509 512 if tn is None:
510 513 tn = b''
511 514
512 515 if to == tn:
513 516 return b''
514 517
515 518 # TODO: deltas
516 519 ret = []
517 520 ret.append(b'GIT binary patch\n')
518 521 ret.append(b'literal %d\n' % len(tn))
519 522 for l in chunk(zlib.compress(tn)):
520 523 ret.append(fmtline(l))
521 524 ret.append(b'\n')
522 525
523 526 return b''.join(ret)
524 527
525 528
526 529 def patchtext(bin):
527 530 pos = 0
528 531 t = []
529 532 while pos < len(bin):
530 533 p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
531 534 pos += 12
532 535 t.append(bin[pos : pos + l])
533 536 pos += l
534 537 return b"".join(t)
535 538
536 539
537 540 def patch(a, bin):
538 541 if len(a) == 0:
539 542 # skip over trivial delta header
540 543 return util.buffer(bin, 12)
541 544 return mpatch.patches(a, [bin])
542 545
543 546
544 547 # similar to difflib.SequenceMatcher.get_matching_blocks
545 548 def get_matching_blocks(a, b):
546 549 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
547 550
548 551
549 552 def trivialdiffheader(length):
550 553 return struct.pack(b">lll", 0, 0, length) if length else b''
551 554
552 555
553 556 def replacediffheader(oldlen, newlen):
554 557 return struct.pack(b">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now