##// END OF EJS Templates
mdiff: move re-exports to top...
Yuya Nishihara -
r32199:2d84947c default
parent child Browse files
Show More
@@ -1,484 +1,484 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 base85,
17 17 bdiff,
18 18 error,
19 19 mpatch,
20 20 pycompat,
21 21 util,
22 22 )
23 23
24 patches = mpatch.patches
25 patchedsize = mpatch.patchedsize
26 textdiff = bdiff.bdiff
27
24 28 def splitnewlines(text):
25 29 '''like str.splitlines, but only split on newlines.'''
26 30 lines = [l + '\n' for l in text.split('\n')]
27 31 if lines:
28 32 if lines[-1] == '\n':
29 33 lines.pop()
30 34 else:
31 35 lines[-1] = lines[-1][:-1]
32 36 return lines
33 37
34 38 class diffopts(object):
35 39 '''context is the number of context lines
36 40 text treats all files as text
37 41 showfunc enables diff -p output
38 42 git enables the git extended patch format
39 43 nodates removes dates from diff headers
40 44 nobinary ignores binary files
41 45 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
42 46 ignorews ignores all whitespace changes in the diff
43 47 ignorewsamount ignores changes in the amount of whitespace
44 48 ignoreblanklines ignores changes whose lines are all blank
45 49 upgrade generates git diffs to avoid data loss
46 50 '''
47 51
48 52 defaults = {
49 53 'context': 3,
50 54 'text': False,
51 55 'showfunc': False,
52 56 'git': False,
53 57 'nodates': False,
54 58 'nobinary': False,
55 59 'noprefix': False,
56 60 'index': 0,
57 61 'ignorews': False,
58 62 'ignorewsamount': False,
59 63 'ignoreblanklines': False,
60 64 'upgrade': False,
61 65 'showsimilarity': False,
62 66 }
63 67
64 68 def __init__(self, **opts):
65 69 opts = pycompat.byteskwargs(opts)
66 70 for k in self.defaults.keys():
67 71 v = opts.get(k)
68 72 if v is None:
69 73 v = self.defaults[k]
70 74 setattr(self, k, v)
71 75
72 76 try:
73 77 self.context = int(self.context)
74 78 except ValueError:
75 79 raise error.Abort(_('diff context lines count must be '
76 80 'an integer, not %r') % self.context)
77 81
78 82 def copy(self, **kwargs):
79 83 opts = dict((k, getattr(self, k)) for k in self.defaults)
80 84 opts.update(kwargs)
81 85 return diffopts(**opts)
82 86
83 87 defaultopts = diffopts()
84 88
85 89 def wsclean(opts, text, blank=True):
86 90 if opts.ignorews:
87 91 text = bdiff.fixws(text, 1)
88 92 elif opts.ignorewsamount:
89 93 text = bdiff.fixws(text, 0)
90 94 if blank and opts.ignoreblanklines:
91 95 text = re.sub('\n+', '\n', text).strip('\n')
92 96 return text
93 97
94 98 def splitblock(base1, lines1, base2, lines2, opts):
95 99 # The input lines matches except for interwoven blank lines. We
96 100 # transform it into a sequence of matching blocks and blank blocks.
97 101 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
98 102 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
99 103 s1, e1 = 0, len(lines1)
100 104 s2, e2 = 0, len(lines2)
101 105 while s1 < e1 or s2 < e2:
102 106 i1, i2, btype = s1, s2, '='
103 107 if (i1 >= e1 or lines1[i1] == 0
104 108 or i2 >= e2 or lines2[i2] == 0):
105 109 # Consume the block of blank lines
106 110 btype = '~'
107 111 while i1 < e1 and lines1[i1] == 0:
108 112 i1 += 1
109 113 while i2 < e2 and lines2[i2] == 0:
110 114 i2 += 1
111 115 else:
112 116 # Consume the matching lines
113 117 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
114 118 i1 += 1
115 119 i2 += 1
116 120 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
117 121 s1 = i1
118 122 s2 = i2
119 123
120 124 def hunkinrange(hunk, linerange):
121 125 """Return True if `hunk` defined as (start, length) is in `linerange`
122 126 defined as (lowerbound, upperbound).
123 127
124 128 >>> hunkinrange((5, 10), (2, 7))
125 129 True
126 130 >>> hunkinrange((5, 10), (6, 12))
127 131 True
128 132 >>> hunkinrange((5, 10), (13, 17))
129 133 True
130 134 >>> hunkinrange((5, 10), (3, 17))
131 135 True
132 136 >>> hunkinrange((5, 10), (1, 3))
133 137 False
134 138 >>> hunkinrange((5, 10), (18, 20))
135 139 False
136 140 >>> hunkinrange((5, 10), (1, 5))
137 141 False
138 142 >>> hunkinrange((5, 10), (15, 27))
139 143 False
140 144 """
141 145 start, length = hunk
142 146 lowerbound, upperbound = linerange
143 147 return lowerbound < start + length and start < upperbound
144 148
145 149 def blocksinrange(blocks, rangeb):
146 150 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
147 151 `rangeb` from ``(b1, b2)`` point of view.
148 152
149 153 Return `filteredblocks, rangea` where:
150 154
151 155 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
152 156 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
153 157 block ``(b1, b2)`` being inside `rangeb` if
154 158 ``rangeb[0] < b2 and b1 < rangeb[1]``;
155 159 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
156 160 """
157 161 lbb, ubb = rangeb
158 162 lba, uba = None, None
159 163 filteredblocks = []
160 164 for block in blocks:
161 165 (a1, a2, b1, b2), stype = block
162 166 if lbb >= b1 and ubb <= b2 and stype == '=':
163 167 # rangeb is within a single "=" hunk, restrict back linerange1
164 168 # by offsetting rangeb
165 169 lba = lbb - b1 + a1
166 170 uba = ubb - b1 + a1
167 171 else:
168 172 if b1 <= lbb < b2:
169 173 if stype == '=':
170 174 lba = a2 - (b2 - lbb)
171 175 else:
172 176 lba = a1
173 177 if b1 < ubb <= b2:
174 178 if stype == '=':
175 179 uba = a1 + (ubb - b1)
176 180 else:
177 181 uba = a2
178 182 if hunkinrange((b1, (b2 - b1)), rangeb):
179 183 filteredblocks.append(block)
180 184 if lba is None or uba is None or uba < lba:
181 185 raise error.Abort(_('line range exceeds file size'))
182 186 return filteredblocks, (lba, uba)
183 187
184 188 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
185 189 """Return (block, type) tuples, where block is an mdiff.blocks
186 190 line entry. type is '=' for blocks matching exactly one another
187 191 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
188 192 matching only after having filtered blank lines.
189 193 line1 and line2 are text1 and text2 split with splitnewlines() if
190 194 they are already available.
191 195 """
192 196 if opts is None:
193 197 opts = defaultopts
194 198 if opts.ignorews or opts.ignorewsamount:
195 199 text1 = wsclean(opts, text1, False)
196 200 text2 = wsclean(opts, text2, False)
197 201 diff = bdiff.blocks(text1, text2)
198 202 for i, s1 in enumerate(diff):
199 203 # The first match is special.
200 204 # we've either found a match starting at line 0 or a match later
201 205 # in the file. If it starts later, old and new below will both be
202 206 # empty and we'll continue to the next match.
203 207 if i > 0:
204 208 s = diff[i - 1]
205 209 else:
206 210 s = [0, 0, 0, 0]
207 211 s = [s[1], s1[0], s[3], s1[2]]
208 212
209 213 # bdiff sometimes gives huge matches past eof, this check eats them,
210 214 # and deals with the special first match case described above
211 215 if s[0] != s[1] or s[2] != s[3]:
212 216 type = '!'
213 217 if opts.ignoreblanklines:
214 218 if lines1 is None:
215 219 lines1 = splitnewlines(text1)
216 220 if lines2 is None:
217 221 lines2 = splitnewlines(text2)
218 222 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
219 223 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
220 224 if old == new:
221 225 type = '~'
222 226 yield s, type
223 227 yield s1, '='
224 228
225 229 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
226 230 """Return a unified diff as a (headers, hunks) tuple.
227 231
228 232 If the diff is not null, `headers` is a list with unified diff header
229 233 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
230 234 (hunkrange, hunklines) coming from _unidiff().
231 235 Otherwise, `headers` and `hunks` are empty.
232 236 """
233 237 def datetag(date, fn=None):
234 238 if not opts.git and not opts.nodates:
235 239 return '\t%s' % date
236 240 if fn and ' ' in fn:
237 241 return '\t'
238 242 return ''
239 243
240 244 sentinel = [], ()
241 245 if not a and not b:
242 246 return sentinel
243 247
244 248 if opts.noprefix:
245 249 aprefix = bprefix = ''
246 250 else:
247 251 aprefix = 'a/'
248 252 bprefix = 'b/'
249 253
250 254 epoch = util.datestr((0, 0))
251 255
252 256 fn1 = util.pconvert(fn1)
253 257 fn2 = util.pconvert(fn2)
254 258
255 259 def checknonewline(lines):
256 260 for text in lines:
257 261 if text[-1:] != '\n':
258 262 text += "\n\ No newline at end of file\n"
259 263 yield text
260 264
261 265 if not opts.text and (util.binary(a) or util.binary(b)):
262 266 if a and b and len(a) == len(b) and a == b:
263 267 return sentinel
264 268 headerlines = []
265 269 hunks = (None, ['Binary file %s has changed\n' % fn1]),
266 270 elif not a:
267 271 b = splitnewlines(b)
268 272 if a is None:
269 273 l1 = '--- /dev/null%s' % datetag(epoch)
270 274 else:
271 275 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
272 276 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
273 277 headerlines = [l1, l2]
274 278 size = len(b)
275 279 hunkrange = (0, 0, 1, size)
276 280 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
277 281 hunks = (hunkrange, checknonewline(hunklines)),
278 282 elif not b:
279 283 a = splitnewlines(a)
280 284 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
281 285 if b is None:
282 286 l2 = '+++ /dev/null%s' % datetag(epoch)
283 287 else:
284 288 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
285 289 headerlines = [l1, l2]
286 290 size = len(a)
287 291 hunkrange = (1, size, 0, 0)
288 292 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
289 293 hunks = (hunkrange, checknonewline(hunklines)),
290 294 else:
291 295 diffhunks = _unidiff(a, b, opts=opts)
292 296 try:
293 297 hunkrange, hunklines = next(diffhunks)
294 298 except StopIteration:
295 299 return sentinel
296 300
297 301 headerlines = [
298 302 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
299 303 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
300 304 ]
301 305 def rewindhunks():
302 306 yield hunkrange, checknonewline(hunklines)
303 307 for hr, hl in diffhunks:
304 308 yield hr, checknonewline(hl)
305 309
306 310 hunks = rewindhunks()
307 311
308 312 return headerlines, hunks
309 313
310 314 def _unidiff(t1, t2, opts=defaultopts):
311 315 """Yield hunks of a headerless unified diff from t1 and t2 texts.
312 316
313 317 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
314 318 tuple (s1, l1, s2, l2) representing the range information of the hunk to
315 319 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
316 320 of the hunk combining said header followed by line additions and
317 321 deletions.
318 322 """
319 323 l1 = splitnewlines(t1)
320 324 l2 = splitnewlines(t2)
321 325 def contextend(l, len):
322 326 ret = l + opts.context
323 327 if ret > len:
324 328 ret = len
325 329 return ret
326 330
327 331 def contextstart(l):
328 332 ret = l - opts.context
329 333 if ret < 0:
330 334 return 0
331 335 return ret
332 336
333 337 lastfunc = [0, '']
334 338 def yieldhunk(hunk):
335 339 (astart, a2, bstart, b2, delta) = hunk
336 340 aend = contextend(a2, len(l1))
337 341 alen = aend - astart
338 342 blen = b2 - bstart + aend - a2
339 343
340 344 func = ""
341 345 if opts.showfunc:
342 346 lastpos, func = lastfunc
343 347 # walk backwards from the start of the context up to the start of
344 348 # the previous hunk context until we find a line starting with an
345 349 # alphanumeric char.
346 350 for i in xrange(astart - 1, lastpos - 1, -1):
347 351 if l1[i][0].isalnum():
348 352 func = ' ' + l1[i].rstrip()[:40]
349 353 lastfunc[1] = func
350 354 break
351 355 # by recording this hunk's starting point as the next place to
352 356 # start looking for function lines, we avoid reading any line in
353 357 # the file more than once.
354 358 lastfunc[0] = astart
355 359
356 360 # zero-length hunk ranges report their start line as one less
357 361 if alen:
358 362 astart += 1
359 363 if blen:
360 364 bstart += 1
361 365
362 366 hunkrange = astart, alen, bstart, blen
363 367 hunklines = (
364 368 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
365 369 + delta
366 370 + [' ' + l1[x] for x in xrange(a2, aend)]
367 371 )
368 372 yield hunkrange, hunklines
369 373
370 374 # bdiff.blocks gives us the matching sequences in the files. The loop
371 375 # below finds the spaces between those matching sequences and translates
372 376 # them into diff output.
373 377 #
374 378 hunk = None
375 379 ignoredlines = 0
376 380 for s, stype in allblocks(t1, t2, opts, l1, l2):
377 381 a1, a2, b1, b2 = s
378 382 if stype != '!':
379 383 if stype == '~':
380 384 # The diff context lines are based on t1 content. When
381 385 # blank lines are ignored, the new lines offsets must
382 386 # be adjusted as if equivalent blocks ('~') had the
383 387 # same sizes on both sides.
384 388 ignoredlines += (b2 - b1) - (a2 - a1)
385 389 continue
386 390 delta = []
387 391 old = l1[a1:a2]
388 392 new = l2[b1:b2]
389 393
390 394 b1 -= ignoredlines
391 395 b2 -= ignoredlines
392 396 astart = contextstart(a1)
393 397 bstart = contextstart(b1)
394 398 prev = None
395 399 if hunk:
396 400 # join with the previous hunk if it falls inside the context
397 401 if astart < hunk[1] + opts.context + 1:
398 402 prev = hunk
399 403 astart = hunk[1]
400 404 bstart = hunk[3]
401 405 else:
402 406 for x in yieldhunk(hunk):
403 407 yield x
404 408 if prev:
405 409 # we've joined the previous hunk, record the new ending points.
406 410 hunk[1] = a2
407 411 hunk[3] = b2
408 412 delta = hunk[4]
409 413 else:
410 414 # create a new hunk
411 415 hunk = [astart, a2, bstart, b2, delta]
412 416
413 417 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
414 418 delta[len(delta):] = ['-' + x for x in old]
415 419 delta[len(delta):] = ['+' + x for x in new]
416 420
417 421 if hunk:
418 422 for x in yieldhunk(hunk):
419 423 yield x
420 424
421 425 def b85diff(to, tn):
422 426 '''print base85-encoded binary diff'''
423 427 def fmtline(line):
424 428 l = len(line)
425 429 if l <= 26:
426 430 l = chr(ord('A') + l - 1)
427 431 else:
428 432 l = chr(l - 26 + ord('a') - 1)
429 433 return '%c%s\n' % (l, base85.b85encode(line, True))
430 434
431 435 def chunk(text, csize=52):
432 436 l = len(text)
433 437 i = 0
434 438 while i < l:
435 439 yield text[i:i + csize]
436 440 i += csize
437 441
438 442 if to is None:
439 443 to = ''
440 444 if tn is None:
441 445 tn = ''
442 446
443 447 if to == tn:
444 448 return ''
445 449
446 450 # TODO: deltas
447 451 ret = []
448 452 ret.append('GIT binary patch\n')
449 453 ret.append('literal %s\n' % len(tn))
450 454 for l in chunk(zlib.compress(tn)):
451 455 ret.append(fmtline(l))
452 456 ret.append('\n')
453 457
454 458 return ''.join(ret)
455 459
456 460 def patchtext(bin):
457 461 pos = 0
458 462 t = []
459 463 while pos < len(bin):
460 464 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
461 465 pos += 12
462 466 t.append(bin[pos:pos + l])
463 467 pos += l
464 468 return "".join(t)
465 469
466 470 def patch(a, bin):
467 471 if len(a) == 0:
468 472 # skip over trivial delta header
469 473 return util.buffer(bin, 12)
470 474 return mpatch.patches(a, [bin])
471 475
472 476 # similar to difflib.SequenceMatcher.get_matching_blocks
473 477 def get_matching_blocks(a, b):
474 478 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
475 479
476 480 def trivialdiffheader(length):
477 481 return struct.pack(">lll", 0, 0, length) if length else ''
478 482
479 483 def replacediffheader(oldlen, newlen):
480 484 return struct.pack(">lll", 0, oldlen, newlen)
481
482 patches = mpatch.patches
483 patchedsize = mpatch.patchedsize
484 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now