##// END OF EJS Templates
mdiff: let _unidiff yield hunks as (<range information>, <hunk lines>)...
Denis Laxalde -
r31269:5e7fd3a0 default
parent child Browse files
Show More
@@ -1,422 +1,430 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 base85,
17 17 bdiff,
18 18 error,
19 19 mpatch,
20 20 util,
21 21 )
22 22
23 23 def splitnewlines(text):
24 24 '''like str.splitlines, but only split on newlines.'''
25 25 lines = [l + '\n' for l in text.split('\n')]
26 26 if lines:
27 27 if lines[-1] == '\n':
28 28 lines.pop()
29 29 else:
30 30 lines[-1] = lines[-1][:-1]
31 31 return lines
32 32
33 33 class diffopts(object):
34 34 '''context is the number of context lines
35 35 text treats all files as text
36 36 showfunc enables diff -p output
37 37 git enables the git extended patch format
38 38 nodates removes dates from diff headers
39 39 nobinary ignores binary files
40 40 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
41 41 ignorews ignores all whitespace changes in the diff
42 42 ignorewsamount ignores changes in the amount of whitespace
43 43 ignoreblanklines ignores changes whose lines are all blank
44 44 upgrade generates git diffs to avoid data loss
45 45 '''
46 46
47 47 defaults = {
48 48 'context': 3,
49 49 'text': False,
50 50 'showfunc': False,
51 51 'git': False,
52 52 'nodates': False,
53 53 'nobinary': False,
54 54 'noprefix': False,
55 55 'index': 0,
56 56 'ignorews': False,
57 57 'ignorewsamount': False,
58 58 'ignoreblanklines': False,
59 59 'upgrade': False,
60 60 'showsimilarity': False,
61 61 }
62 62
63 63 def __init__(self, **opts):
64 64 for k in self.defaults.keys():
65 65 v = opts.get(k)
66 66 if v is None:
67 67 v = self.defaults[k]
68 68 setattr(self, k, v)
69 69
70 70 try:
71 71 self.context = int(self.context)
72 72 except ValueError:
73 73 raise error.Abort(_('diff context lines count must be '
74 74 'an integer, not %r') % self.context)
75 75
76 76 def copy(self, **kwargs):
77 77 opts = dict((k, getattr(self, k)) for k in self.defaults)
78 78 opts.update(kwargs)
79 79 return diffopts(**opts)
80 80
81 81 defaultopts = diffopts()
82 82
83 83 def wsclean(opts, text, blank=True):
84 84 if opts.ignorews:
85 85 text = bdiff.fixws(text, 1)
86 86 elif opts.ignorewsamount:
87 87 text = bdiff.fixws(text, 0)
88 88 if blank and opts.ignoreblanklines:
89 89 text = re.sub('\n+', '\n', text).strip('\n')
90 90 return text
91 91
92 92 def splitblock(base1, lines1, base2, lines2, opts):
93 93 # The input lines matches except for interwoven blank lines. We
94 94 # transform it into a sequence of matching blocks and blank blocks.
95 95 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
96 96 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
97 97 s1, e1 = 0, len(lines1)
98 98 s2, e2 = 0, len(lines2)
99 99 while s1 < e1 or s2 < e2:
100 100 i1, i2, btype = s1, s2, '='
101 101 if (i1 >= e1 or lines1[i1] == 0
102 102 or i2 >= e2 or lines2[i2] == 0):
103 103 # Consume the block of blank lines
104 104 btype = '~'
105 105 while i1 < e1 and lines1[i1] == 0:
106 106 i1 += 1
107 107 while i2 < e2 and lines2[i2] == 0:
108 108 i2 += 1
109 109 else:
110 110 # Consume the matching lines
111 111 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
112 112 i1 += 1
113 113 i2 += 1
114 114 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
115 115 s1 = i1
116 116 s2 = i2
117 117
118 118 def blocksinrange(blocks, rangeb):
119 119 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
120 120 `rangeb` from ``(b1, b2)`` point of view.
121 121
122 122 Return `filteredblocks, rangea` where:
123 123
124 124 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
125 125 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
126 126 block ``(b1, b2)`` being inside `rangeb` if
127 127 ``rangeb[0] < b2 and b1 < rangeb[1]``;
128 128 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
129 129 """
130 130 lbb, ubb = rangeb
131 131 lba, uba = None, None
132 132 filteredblocks = []
133 133 for block in blocks:
134 134 (a1, a2, b1, b2), stype = block
135 135 if lbb >= b1 and ubb <= b2 and stype == '=':
136 136 # rangeb is within a single "=" hunk, restrict back linerange1
137 137 # by offsetting rangeb
138 138 lba = lbb - b1 + a1
139 139 uba = ubb - b1 + a1
140 140 else:
141 141 if b1 <= lbb < b2:
142 142 if stype == '=':
143 143 lba = a2 - (b2 - lbb)
144 144 else:
145 145 lba = a1
146 146 if b1 < ubb <= b2:
147 147 if stype == '=':
148 148 uba = a1 + (ubb - b1)
149 149 else:
150 150 uba = a2
151 151 if lbb < b2 and b1 < ubb:
152 152 filteredblocks.append(block)
153 153 if lba is None or uba is None or uba < lba:
154 154 raise error.Abort(_('line range exceeds file size'))
155 155 return filteredblocks, (lba, uba)
156 156
157 157 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
158 158 """Return (block, type) tuples, where block is an mdiff.blocks
159 159 line entry. type is '=' for blocks matching exactly one another
160 160 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
161 161 matching only after having filtered blank lines.
162 162 line1 and line2 are text1 and text2 split with splitnewlines() if
163 163 they are already available.
164 164 """
165 165 if opts is None:
166 166 opts = defaultopts
167 167 if opts.ignorews or opts.ignorewsamount:
168 168 text1 = wsclean(opts, text1, False)
169 169 text2 = wsclean(opts, text2, False)
170 170 diff = bdiff.blocks(text1, text2)
171 171 for i, s1 in enumerate(diff):
172 172 # The first match is special.
173 173 # we've either found a match starting at line 0 or a match later
174 174 # in the file. If it starts later, old and new below will both be
175 175 # empty and we'll continue to the next match.
176 176 if i > 0:
177 177 s = diff[i - 1]
178 178 else:
179 179 s = [0, 0, 0, 0]
180 180 s = [s[1], s1[0], s[3], s1[2]]
181 181
182 182 # bdiff sometimes gives huge matches past eof, this check eats them,
183 183 # and deals with the special first match case described above
184 184 if s[0] != s[1] or s[2] != s[3]:
185 185 type = '!'
186 186 if opts.ignoreblanklines:
187 187 if lines1 is None:
188 188 lines1 = splitnewlines(text1)
189 189 if lines2 is None:
190 190 lines2 = splitnewlines(text2)
191 191 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
192 192 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
193 193 if old == new:
194 194 type = '~'
195 195 yield s, type
196 196 yield s1, '='
197 197
198 198 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
199 199 def datetag(date, fn=None):
200 200 if not opts.git and not opts.nodates:
201 201 return '\t%s\n' % date
202 202 if fn and ' ' in fn:
203 203 return '\t\n'
204 204 return '\n'
205 205
206 206 if not a and not b:
207 207 return ""
208 208
209 209 if opts.noprefix:
210 210 aprefix = bprefix = ''
211 211 else:
212 212 aprefix = 'a/'
213 213 bprefix = 'b/'
214 214
215 215 epoch = util.datestr((0, 0))
216 216
217 217 fn1 = util.pconvert(fn1)
218 218 fn2 = util.pconvert(fn2)
219 219
220 220 if not opts.text and (util.binary(a) or util.binary(b)):
221 221 if a and b and len(a) == len(b) and a == b:
222 222 return ""
223 223 l = ['Binary file %s has changed\n' % fn1]
224 224 elif not a:
225 225 b = splitnewlines(b)
226 226 if a is None:
227 227 l1 = '--- /dev/null%s' % datetag(epoch)
228 228 else:
229 229 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
230 230 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
231 231 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
232 232 l = [l1, l2, l3] + ["+" + e for e in b]
233 233 elif not b:
234 234 a = splitnewlines(a)
235 235 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
236 236 if b is None:
237 237 l2 = '+++ /dev/null%s' % datetag(epoch)
238 238 else:
239 239 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
240 240 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
241 241 l = [l1, l2, l3] + ["-" + e for e in a]
242 242 else:
243 l = list(_unidiff(a, b, opts=opts))
243 l = sum((hlines for hrange, hlines in _unidiff(a, b, opts=opts)), [])
244 244 if not l:
245 245 return ""
246 246
247 247 l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))
248 248 l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))
249 249
250 250 for ln in xrange(len(l)):
251 251 if l[ln][-1] != '\n':
252 252 l[ln] += "\n\ No newline at end of file\n"
253 253
254 254 return "".join(l)
255 255
256 256 def _unidiff(t1, t2, opts=defaultopts):
257 """Yield hunks of a headerless unified diff from t1 and t2 texts."""
257 """Yield hunks of a headerless unified diff from t1 and t2 texts.
258
259 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
260 tuple (s1, l1, s2, l2) representing the range information of the hunk to
261 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
262 of the hunk combining said header followed by line additions and
263 deletions.
264 """
258 265 l1 = splitnewlines(t1)
259 266 l2 = splitnewlines(t2)
260 267 def contextend(l, len):
261 268 ret = l + opts.context
262 269 if ret > len:
263 270 ret = len
264 271 return ret
265 272
266 273 def contextstart(l):
267 274 ret = l - opts.context
268 275 if ret < 0:
269 276 return 0
270 277 return ret
271 278
272 279 lastfunc = [0, '']
273 280 def yieldhunk(hunk):
274 281 (astart, a2, bstart, b2, delta) = hunk
275 282 aend = contextend(a2, len(l1))
276 283 alen = aend - astart
277 284 blen = b2 - bstart + aend - a2
278 285
279 286 func = ""
280 287 if opts.showfunc:
281 288 lastpos, func = lastfunc
282 289 # walk backwards from the start of the context up to the start of
283 290 # the previous hunk context until we find a line starting with an
284 291 # alphanumeric char.
285 292 for i in xrange(astart - 1, lastpos - 1, -1):
286 293 if l1[i][0].isalnum():
287 294 func = ' ' + l1[i].rstrip()[:40]
288 295 lastfunc[1] = func
289 296 break
290 297 # by recording this hunk's starting point as the next place to
291 298 # start looking for function lines, we avoid reading any line in
292 299 # the file more than once.
293 300 lastfunc[0] = astart
294 301
295 302 # zero-length hunk ranges report their start line as one less
296 303 if alen:
297 304 astart += 1
298 305 if blen:
299 306 bstart += 1
300 307
301 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
302 bstart, blen, func)
303 for x in delta:
304 yield x
305 for x in xrange(a2, aend):
306 yield ' ' + l1[x]
308 hunkrange = astart, alen, bstart, blen
309 hunklines = (
310 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
311 + delta
312 + [' ' + l1[x] for x in xrange(a2, aend)]
313 )
314 yield hunkrange, hunklines
307 315
308 316 # bdiff.blocks gives us the matching sequences in the files. The loop
309 317 # below finds the spaces between those matching sequences and translates
310 318 # them into diff output.
311 319 #
312 320 hunk = None
313 321 ignoredlines = 0
314 322 for s, stype in allblocks(t1, t2, opts, l1, l2):
315 323 a1, a2, b1, b2 = s
316 324 if stype != '!':
317 325 if stype == '~':
318 326 # The diff context lines are based on t1 content. When
319 327 # blank lines are ignored, the new lines offsets must
320 328 # be adjusted as if equivalent blocks ('~') had the
321 329 # same sizes on both sides.
322 330 ignoredlines += (b2 - b1) - (a2 - a1)
323 331 continue
324 332 delta = []
325 333 old = l1[a1:a2]
326 334 new = l2[b1:b2]
327 335
328 336 b1 -= ignoredlines
329 337 b2 -= ignoredlines
330 338 astart = contextstart(a1)
331 339 bstart = contextstart(b1)
332 340 prev = None
333 341 if hunk:
334 342 # join with the previous hunk if it falls inside the context
335 343 if astart < hunk[1] + opts.context + 1:
336 344 prev = hunk
337 345 astart = hunk[1]
338 346 bstart = hunk[3]
339 347 else:
340 348 for x in yieldhunk(hunk):
341 349 yield x
342 350 if prev:
343 351 # we've joined the previous hunk, record the new ending points.
344 352 hunk[1] = a2
345 353 hunk[3] = b2
346 354 delta = hunk[4]
347 355 else:
348 356 # create a new hunk
349 357 hunk = [astart, a2, bstart, b2, delta]
350 358
351 359 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
352 360 delta[len(delta):] = ['-' + x for x in old]
353 361 delta[len(delta):] = ['+' + x for x in new]
354 362
355 363 if hunk:
356 364 for x in yieldhunk(hunk):
357 365 yield x
358 366
359 367 def b85diff(to, tn):
360 368 '''print base85-encoded binary diff'''
361 369 def fmtline(line):
362 370 l = len(line)
363 371 if l <= 26:
364 372 l = chr(ord('A') + l - 1)
365 373 else:
366 374 l = chr(l - 26 + ord('a') - 1)
367 375 return '%c%s\n' % (l, base85.b85encode(line, True))
368 376
369 377 def chunk(text, csize=52):
370 378 l = len(text)
371 379 i = 0
372 380 while i < l:
373 381 yield text[i:i + csize]
374 382 i += csize
375 383
376 384 if to is None:
377 385 to = ''
378 386 if tn is None:
379 387 tn = ''
380 388
381 389 if to == tn:
382 390 return ''
383 391
384 392 # TODO: deltas
385 393 ret = []
386 394 ret.append('GIT binary patch\n')
387 395 ret.append('literal %s\n' % len(tn))
388 396 for l in chunk(zlib.compress(tn)):
389 397 ret.append(fmtline(l))
390 398 ret.append('\n')
391 399
392 400 return ''.join(ret)
393 401
394 402 def patchtext(bin):
395 403 pos = 0
396 404 t = []
397 405 while pos < len(bin):
398 406 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
399 407 pos += 12
400 408 t.append(bin[pos:pos + l])
401 409 pos += l
402 410 return "".join(t)
403 411
404 412 def patch(a, bin):
405 413 if len(a) == 0:
406 414 # skip over trivial delta header
407 415 return util.buffer(bin, 12)
408 416 return mpatch.patches(a, [bin])
409 417
410 418 # similar to difflib.SequenceMatcher.get_matching_blocks
411 419 def get_matching_blocks(a, b):
412 420 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
413 421
414 422 def trivialdiffheader(length):
415 423 return struct.pack(">lll", 0, 0, length) if length else ''
416 424
417 425 def replacediffheader(oldlen, newlen):
418 426 return struct.pack(">lll", 0, oldlen, newlen)
419 427
420 428 patches = mpatch.patches
421 429 patchedsize = mpatch.patchedsize
422 430 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now