##// END OF EJS Templates
mdiff: explicitly compute places for the newline marker...
Joerg Sonnenberger -
r35869:a9d07bd8 default
parent child Browse files
Show More
@@ -1,495 +1,521
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 policy,
18 18 pycompat,
19 19 util,
20 20 )
21 21
22 _missing_newline_marker = "\\ No newline at end of file\n"
23
22 24 bdiff = policy.importmod(r'bdiff')
23 25 mpatch = policy.importmod(r'mpatch')
24 26
25 27 blocks = bdiff.blocks
26 28 fixws = bdiff.fixws
27 29 patches = mpatch.patches
28 30 patchedsize = mpatch.patchedsize
29 31 textdiff = bdiff.bdiff
30 32
31 33 def splitnewlines(text):
32 34 '''like str.splitlines, but only split on newlines.'''
33 35 lines = [l + '\n' for l in text.split('\n')]
34 36 if lines:
35 37 if lines[-1] == '\n':
36 38 lines.pop()
37 39 else:
38 40 lines[-1] = lines[-1][:-1]
39 41 return lines
40 42
41 43 class diffopts(object):
42 44 '''context is the number of context lines
43 45 text treats all files as text
44 46 showfunc enables diff -p output
45 47 git enables the git extended patch format
46 48 nodates removes dates from diff headers
47 49 nobinary ignores binary files
48 50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
49 51 ignorews ignores all whitespace changes in the diff
50 52 ignorewsamount ignores changes in the amount of whitespace
51 53 ignoreblanklines ignores changes whose lines are all blank
52 54 upgrade generates git diffs to avoid data loss
53 55 '''
54 56
55 57 defaults = {
56 58 'context': 3,
57 59 'text': False,
58 60 'showfunc': False,
59 61 'git': False,
60 62 'nodates': False,
61 63 'nobinary': False,
62 64 'noprefix': False,
63 65 'index': 0,
64 66 'ignorews': False,
65 67 'ignorewsamount': False,
66 68 'ignorewseol': False,
67 69 'ignoreblanklines': False,
68 70 'upgrade': False,
69 71 'showsimilarity': False,
70 72 'worddiff': False,
71 73 }
72 74
73 75 def __init__(self, **opts):
74 76 opts = pycompat.byteskwargs(opts)
75 77 for k in self.defaults.keys():
76 78 v = opts.get(k)
77 79 if v is None:
78 80 v = self.defaults[k]
79 81 setattr(self, k, v)
80 82
81 83 try:
82 84 self.context = int(self.context)
83 85 except ValueError:
84 86 raise error.Abort(_('diff context lines count must be '
85 87 'an integer, not %r') % self.context)
86 88
87 89 def copy(self, **kwargs):
88 90 opts = dict((k, getattr(self, k)) for k in self.defaults)
89 91 opts = pycompat.strkwargs(opts)
90 92 opts.update(kwargs)
91 93 return diffopts(**opts)
92 94
93 95 defaultopts = diffopts()
94 96
95 97 def wsclean(opts, text, blank=True):
96 98 if opts.ignorews:
97 99 text = bdiff.fixws(text, 1)
98 100 elif opts.ignorewsamount:
99 101 text = bdiff.fixws(text, 0)
100 102 if blank and opts.ignoreblanklines:
101 103 text = re.sub('\n+', '\n', text).strip('\n')
102 104 if opts.ignorewseol:
103 105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
104 106 return text
105 107
106 108 def splitblock(base1, lines1, base2, lines2, opts):
107 109 # The input lines matches except for interwoven blank lines. We
108 110 # transform it into a sequence of matching blocks and blank blocks.
109 111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
110 112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
111 113 s1, e1 = 0, len(lines1)
112 114 s2, e2 = 0, len(lines2)
113 115 while s1 < e1 or s2 < e2:
114 116 i1, i2, btype = s1, s2, '='
115 117 if (i1 >= e1 or lines1[i1] == 0
116 118 or i2 >= e2 or lines2[i2] == 0):
117 119 # Consume the block of blank lines
118 120 btype = '~'
119 121 while i1 < e1 and lines1[i1] == 0:
120 122 i1 += 1
121 123 while i2 < e2 and lines2[i2] == 0:
122 124 i2 += 1
123 125 else:
124 126 # Consume the matching lines
125 127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
126 128 i1 += 1
127 129 i2 += 1
128 130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
129 131 s1 = i1
130 132 s2 = i2
131 133
132 134 def hunkinrange(hunk, linerange):
133 135 """Return True if `hunk` defined as (start, length) is in `linerange`
134 136 defined as (lowerbound, upperbound).
135 137
136 138 >>> hunkinrange((5, 10), (2, 7))
137 139 True
138 140 >>> hunkinrange((5, 10), (6, 12))
139 141 True
140 142 >>> hunkinrange((5, 10), (13, 17))
141 143 True
142 144 >>> hunkinrange((5, 10), (3, 17))
143 145 True
144 146 >>> hunkinrange((5, 10), (1, 3))
145 147 False
146 148 >>> hunkinrange((5, 10), (18, 20))
147 149 False
148 150 >>> hunkinrange((5, 10), (1, 5))
149 151 False
150 152 >>> hunkinrange((5, 10), (15, 27))
151 153 False
152 154 """
153 155 start, length = hunk
154 156 lowerbound, upperbound = linerange
155 157 return lowerbound < start + length and start < upperbound
156 158
157 159 def blocksinrange(blocks, rangeb):
158 160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
159 161 `rangeb` from ``(b1, b2)`` point of view.
160 162
161 163 Return `filteredblocks, rangea` where:
162 164
163 165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
164 166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
165 167 block ``(b1, b2)`` being inside `rangeb` if
166 168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
167 169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
168 170 """
169 171 lbb, ubb = rangeb
170 172 lba, uba = None, None
171 173 filteredblocks = []
172 174 for block in blocks:
173 175 (a1, a2, b1, b2), stype = block
174 176 if lbb >= b1 and ubb <= b2 and stype == '=':
175 177 # rangeb is within a single "=" hunk, restrict back linerange1
176 178 # by offsetting rangeb
177 179 lba = lbb - b1 + a1
178 180 uba = ubb - b1 + a1
179 181 else:
180 182 if b1 <= lbb < b2:
181 183 if stype == '=':
182 184 lba = a2 - (b2 - lbb)
183 185 else:
184 186 lba = a1
185 187 if b1 < ubb <= b2:
186 188 if stype == '=':
187 189 uba = a1 + (ubb - b1)
188 190 else:
189 191 uba = a2
190 192 if hunkinrange((b1, (b2 - b1)), rangeb):
191 193 filteredblocks.append(block)
192 194 if lba is None or uba is None or uba < lba:
193 195 raise error.Abort(_('line range exceeds file size'))
194 196 return filteredblocks, (lba, uba)
195 197
196 198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
197 199 """Return (block, type) tuples, where block is an mdiff.blocks
198 200 line entry. type is '=' for blocks matching exactly one another
199 201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
200 202 matching only after having filtered blank lines.
201 203 line1 and line2 are text1 and text2 split with splitnewlines() if
202 204 they are already available.
203 205 """
204 206 if opts is None:
205 207 opts = defaultopts
206 208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
207 209 text1 = wsclean(opts, text1, False)
208 210 text2 = wsclean(opts, text2, False)
209 211 diff = bdiff.blocks(text1, text2)
210 212 for i, s1 in enumerate(diff):
211 213 # The first match is special.
212 214 # we've either found a match starting at line 0 or a match later
213 215 # in the file. If it starts later, old and new below will both be
214 216 # empty and we'll continue to the next match.
215 217 if i > 0:
216 218 s = diff[i - 1]
217 219 else:
218 220 s = [0, 0, 0, 0]
219 221 s = [s[1], s1[0], s[3], s1[2]]
220 222
221 223 # bdiff sometimes gives huge matches past eof, this check eats them,
222 224 # and deals with the special first match case described above
223 225 if s[0] != s[1] or s[2] != s[3]:
224 226 type = '!'
225 227 if opts.ignoreblanklines:
226 228 if lines1 is None:
227 229 lines1 = splitnewlines(text1)
228 230 if lines2 is None:
229 231 lines2 = splitnewlines(text2)
230 232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
231 233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
232 234 if old == new:
233 235 type = '~'
234 236 yield s, type
235 237 yield s1, '='
236 238
237 239 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts, check_binary=True):
238 240 """Return a unified diff as a (headers, hunks) tuple.
239 241
240 242 If the diff is not null, `headers` is a list with unified diff header
241 243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
242 244 (hunkrange, hunklines) coming from _unidiff().
243 245 Otherwise, `headers` and `hunks` are empty.
244 246
245 247 Setting `check_binary` to false will skip the binary check, i.e. when
246 248 it has been done in advance. Files are expected to be text in this case.
247 249 """
248 250 def datetag(date, fn=None):
249 251 if not opts.git and not opts.nodates:
250 252 return '\t%s' % date
251 253 if fn and ' ' in fn:
252 254 return '\t'
253 255 return ''
254 256
255 257 sentinel = [], ()
256 258 if not a and not b:
257 259 return sentinel
258 260
259 261 if opts.noprefix:
260 262 aprefix = bprefix = ''
261 263 else:
262 264 aprefix = 'a/'
263 265 bprefix = 'b/'
264 266
265 267 epoch = util.datestr((0, 0))
266 268
267 269 fn1 = util.pconvert(fn1)
268 270 fn2 = util.pconvert(fn2)
269 271
270 def checknonewline(lines):
271 for text in lines:
272 if text[-1:] != '\n':
273 text += "\n\ No newline at end of file\n"
274 yield text
275
276 272 if not opts.text and check_binary and (util.binary(a) or util.binary(b)):
277 273 if a and b and len(a) == len(b) and a == b:
278 274 return sentinel
279 275 headerlines = []
280 276 hunks = (None, ['Binary file %s has changed\n' % fn1]),
281 277 elif not a:
278 without_newline = b[-1] != '\n'
282 279 b = splitnewlines(b)
283 280 if a is None:
284 281 l1 = '--- /dev/null%s' % datetag(epoch)
285 282 else:
286 283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
287 284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
288 285 headerlines = [l1, l2]
289 286 size = len(b)
290 287 hunkrange = (0, 0, 1, size)
291 288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
292 hunks = (hunkrange, checknonewline(hunklines)),
289 if without_newline:
290 hunklines[-1] += '\n'
291 hunklines.append(_missing_newline_marker)
292 hunks = (hunkrange, hunklines),
293 293 elif not b:
294 without_newline = a[-1] != '\n'
294 295 a = splitnewlines(a)
295 296 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
296 297 if b is None:
297 298 l2 = '+++ /dev/null%s' % datetag(epoch)
298 299 else:
299 300 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
300 301 headerlines = [l1, l2]
301 302 size = len(a)
302 303 hunkrange = (1, size, 0, 0)
303 304 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
304 hunks = (hunkrange, checknonewline(hunklines)),
305 if without_newline:
306 hunklines[-1] += '\n'
307 hunklines.append(_missing_newline_marker)
308 hunks = (hunkrange, hunklines),
305 309 else:
306 310 diffhunks = _unidiff(a, b, opts=opts)
307 311 try:
308 312 hunkrange, hunklines = next(diffhunks)
309 313 except StopIteration:
310 314 return sentinel
311 315
312 316 headerlines = [
313 317 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
314 318 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
315 319 ]
316 320 def rewindhunks():
317 yield hunkrange, checknonewline(hunklines)
321 yield hunkrange, hunklines
318 322 for hr, hl in diffhunks:
319 yield hr, checknonewline(hl)
323 yield hr, hl
320 324
321 325 hunks = rewindhunks()
322 326
323 327 return headerlines, hunks
324 328
325 329 def _unidiff(t1, t2, opts=defaultopts):
326 330 """Yield hunks of a headerless unified diff from t1 and t2 texts.
327 331
328 332 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
329 333 tuple (s1, l1, s2, l2) representing the range information of the hunk to
330 334 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
331 335 of the hunk combining said header followed by line additions and
332 336 deletions.
337
338 The hunks are prefixed with a bool.
333 339 """
334 340 l1 = splitnewlines(t1)
335 341 l2 = splitnewlines(t2)
336 342 def contextend(l, len):
337 343 ret = l + opts.context
338 344 if ret > len:
339 345 ret = len
340 346 return ret
341 347
342 348 def contextstart(l):
343 349 ret = l - opts.context
344 350 if ret < 0:
345 351 return 0
346 352 return ret
347 353
348 354 lastfunc = [0, '']
349 355 def yieldhunk(hunk):
350 356 (astart, a2, bstart, b2, delta) = hunk
351 357 aend = contextend(a2, len(l1))
352 358 alen = aend - astart
353 359 blen = b2 - bstart + aend - a2
354 360
355 361 func = ""
356 362 if opts.showfunc:
357 363 lastpos, func = lastfunc
358 364 # walk backwards from the start of the context up to the start of
359 365 # the previous hunk context until we find a line starting with an
360 366 # alphanumeric char.
361 367 for i in xrange(astart - 1, lastpos - 1, -1):
362 368 if l1[i][0:1].isalnum():
363 369 func = ' ' + l1[i].rstrip()[:40]
364 370 lastfunc[1] = func
365 371 break
366 372 # by recording this hunk's starting point as the next place to
367 373 # start looking for function lines, we avoid reading any line in
368 374 # the file more than once.
369 375 lastfunc[0] = astart
370 376
371 377 # zero-length hunk ranges report their start line as one less
372 378 if alen:
373 379 astart += 1
374 380 if blen:
375 381 bstart += 1
376 382
377 383 hunkrange = astart, alen, bstart, blen
378 384 hunklines = (
379 385 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
380 386 + delta
381 387 + [' ' + l1[x] for x in xrange(a2, aend)]
382 388 )
389 # If either file ends without a newline and the last line of
390 # that file is part of a hunk, a marker is printed. If the
391 # last line of both files is identical and neither ends in
392 # a newline, print only one marker. That's the only case in
393 # which the hunk can end in a shared line without a newline.
394 skip = False
395 if t1[-1] != '\n' and astart + alen == len(l1) + 1:
396 for i in xrange(len(hunklines) - 1, -1, -1):
397 if hunklines[i][0] in ('-', ' '):
398 if hunklines[i][0] == ' ':
399 skip = True
400 hunklines[i] += '\n'
401 hunklines.insert(i + 1, _missing_newline_marker)
402 break
403 if not skip and t2[-1] != '\n' and bstart + blen == len(l2) + 1:
404 for i in xrange(len(hunklines) - 1, -1, -1):
405 if hunklines[i][0] == '+':
406 hunklines[i] += '\n'
407 hunklines.insert(i + 1, _missing_newline_marker)
408 break
383 409 yield hunkrange, hunklines
384 410
385 411 # bdiff.blocks gives us the matching sequences in the files. The loop
386 412 # below finds the spaces between those matching sequences and translates
387 413 # them into diff output.
388 414 #
389 415 hunk = None
390 416 ignoredlines = 0
391 417 for s, stype in allblocks(t1, t2, opts, l1, l2):
392 418 a1, a2, b1, b2 = s
393 419 if stype != '!':
394 420 if stype == '~':
395 421 # The diff context lines are based on t1 content. When
396 422 # blank lines are ignored, the new lines offsets must
397 423 # be adjusted as if equivalent blocks ('~') had the
398 424 # same sizes on both sides.
399 425 ignoredlines += (b2 - b1) - (a2 - a1)
400 426 continue
401 427 delta = []
402 428 old = l1[a1:a2]
403 429 new = l2[b1:b2]
404 430
405 431 b1 -= ignoredlines
406 432 b2 -= ignoredlines
407 433 astart = contextstart(a1)
408 434 bstart = contextstart(b1)
409 435 prev = None
410 436 if hunk:
411 437 # join with the previous hunk if it falls inside the context
412 438 if astart < hunk[1] + opts.context + 1:
413 439 prev = hunk
414 440 astart = hunk[1]
415 441 bstart = hunk[3]
416 442 else:
417 443 for x in yieldhunk(hunk):
418 444 yield x
419 445 if prev:
420 446 # we've joined the previous hunk, record the new ending points.
421 447 hunk[1] = a2
422 448 hunk[3] = b2
423 449 delta = hunk[4]
424 450 else:
425 451 # create a new hunk
426 452 hunk = [astart, a2, bstart, b2, delta]
427 453
428 454 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
429 455 delta[len(delta):] = ['-' + x for x in old]
430 456 delta[len(delta):] = ['+' + x for x in new]
431 457
432 458 if hunk:
433 459 for x in yieldhunk(hunk):
434 460 yield x
435 461
436 462 def b85diff(to, tn):
437 463 '''print base85-encoded binary diff'''
438 464 def fmtline(line):
439 465 l = len(line)
440 466 if l <= 26:
441 467 l = chr(ord('A') + l - 1)
442 468 else:
443 469 l = chr(l - 26 + ord('a') - 1)
444 470 return '%c%s\n' % (l, util.b85encode(line, True))
445 471
446 472 def chunk(text, csize=52):
447 473 l = len(text)
448 474 i = 0
449 475 while i < l:
450 476 yield text[i:i + csize]
451 477 i += csize
452 478
453 479 if to is None:
454 480 to = ''
455 481 if tn is None:
456 482 tn = ''
457 483
458 484 if to == tn:
459 485 return ''
460 486
461 487 # TODO: deltas
462 488 ret = []
463 489 ret.append('GIT binary patch\n')
464 490 ret.append('literal %d\n' % len(tn))
465 491 for l in chunk(zlib.compress(tn)):
466 492 ret.append(fmtline(l))
467 493 ret.append('\n')
468 494
469 495 return ''.join(ret)
470 496
471 497 def patchtext(bin):
472 498 pos = 0
473 499 t = []
474 500 while pos < len(bin):
475 501 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
476 502 pos += 12
477 503 t.append(bin[pos:pos + l])
478 504 pos += l
479 505 return "".join(t)
480 506
481 507 def patch(a, bin):
482 508 if len(a) == 0:
483 509 # skip over trivial delta header
484 510 return util.buffer(bin, 12)
485 511 return mpatch.patches(a, [bin])
486 512
487 513 # similar to difflib.SequenceMatcher.get_matching_blocks
488 514 def get_matching_blocks(a, b):
489 515 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
490 516
491 517 def trivialdiffheader(length):
492 518 return struct.pack(">lll", 0, 0, length) if length else ''
493 519
494 520 def replacediffheader(oldlen, newlen):
495 521 return struct.pack(">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now