##// END OF EJS Templates
mdiff: compute newlines-splitted texts within _unidiff...
Denis Laxalde -
r31267:881ed6a4 default
parent child Browse files
Show More
@@ -1,424 +1,423 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 base85,
17 17 bdiff,
18 18 error,
19 19 mpatch,
20 20 util,
21 21 )
22 22
23 23 def splitnewlines(text):
24 24 '''like str.splitlines, but only split on newlines.'''
25 25 lines = [l + '\n' for l in text.split('\n')]
26 26 if lines:
27 27 if lines[-1] == '\n':
28 28 lines.pop()
29 29 else:
30 30 lines[-1] = lines[-1][:-1]
31 31 return lines
32 32
33 33 class diffopts(object):
34 34 '''context is the number of context lines
35 35 text treats all files as text
36 36 showfunc enables diff -p output
37 37 git enables the git extended patch format
38 38 nodates removes dates from diff headers
39 39 nobinary ignores binary files
40 40 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
41 41 ignorews ignores all whitespace changes in the diff
42 42 ignorewsamount ignores changes in the amount of whitespace
43 43 ignoreblanklines ignores changes whose lines are all blank
44 44 upgrade generates git diffs to avoid data loss
45 45 '''
46 46
47 47 defaults = {
48 48 'context': 3,
49 49 'text': False,
50 50 'showfunc': False,
51 51 'git': False,
52 52 'nodates': False,
53 53 'nobinary': False,
54 54 'noprefix': False,
55 55 'index': 0,
56 56 'ignorews': False,
57 57 'ignorewsamount': False,
58 58 'ignoreblanklines': False,
59 59 'upgrade': False,
60 60 'showsimilarity': False,
61 61 }
62 62
63 63 def __init__(self, **opts):
64 64 for k in self.defaults.keys():
65 65 v = opts.get(k)
66 66 if v is None:
67 67 v = self.defaults[k]
68 68 setattr(self, k, v)
69 69
70 70 try:
71 71 self.context = int(self.context)
72 72 except ValueError:
73 73 raise error.Abort(_('diff context lines count must be '
74 74 'an integer, not %r') % self.context)
75 75
76 76 def copy(self, **kwargs):
77 77 opts = dict((k, getattr(self, k)) for k in self.defaults)
78 78 opts.update(kwargs)
79 79 return diffopts(**opts)
80 80
81 81 defaultopts = diffopts()
82 82
83 83 def wsclean(opts, text, blank=True):
84 84 if opts.ignorews:
85 85 text = bdiff.fixws(text, 1)
86 86 elif opts.ignorewsamount:
87 87 text = bdiff.fixws(text, 0)
88 88 if blank and opts.ignoreblanklines:
89 89 text = re.sub('\n+', '\n', text).strip('\n')
90 90 return text
91 91
92 92 def splitblock(base1, lines1, base2, lines2, opts):
93 93 # The input lines matches except for interwoven blank lines. We
94 94 # transform it into a sequence of matching blocks and blank blocks.
95 95 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
96 96 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
97 97 s1, e1 = 0, len(lines1)
98 98 s2, e2 = 0, len(lines2)
99 99 while s1 < e1 or s2 < e2:
100 100 i1, i2, btype = s1, s2, '='
101 101 if (i1 >= e1 or lines1[i1] == 0
102 102 or i2 >= e2 or lines2[i2] == 0):
103 103 # Consume the block of blank lines
104 104 btype = '~'
105 105 while i1 < e1 and lines1[i1] == 0:
106 106 i1 += 1
107 107 while i2 < e2 and lines2[i2] == 0:
108 108 i2 += 1
109 109 else:
110 110 # Consume the matching lines
111 111 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
112 112 i1 += 1
113 113 i2 += 1
114 114 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
115 115 s1 = i1
116 116 s2 = i2
117 117
118 118 def blocksinrange(blocks, rangeb):
119 119 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
120 120 `rangeb` from ``(b1, b2)`` point of view.
121 121
122 122 Return `filteredblocks, rangea` where:
123 123
124 124 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
125 125 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
126 126 block ``(b1, b2)`` being inside `rangeb` if
127 127 ``rangeb[0] < b2 and b1 < rangeb[1]``;
128 128 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
129 129 """
130 130 lbb, ubb = rangeb
131 131 lba, uba = None, None
132 132 filteredblocks = []
133 133 for block in blocks:
134 134 (a1, a2, b1, b2), stype = block
135 135 if lbb >= b1 and ubb <= b2 and stype == '=':
136 136 # rangeb is within a single "=" hunk, restrict back linerange1
137 137 # by offsetting rangeb
138 138 lba = lbb - b1 + a1
139 139 uba = ubb - b1 + a1
140 140 else:
141 141 if b1 <= lbb < b2:
142 142 if stype == '=':
143 143 lba = a2 - (b2 - lbb)
144 144 else:
145 145 lba = a1
146 146 if b1 < ubb <= b2:
147 147 if stype == '=':
148 148 uba = a1 + (ubb - b1)
149 149 else:
150 150 uba = a2
151 151 if lbb < b2 and b1 < ubb:
152 152 filteredblocks.append(block)
153 153 if lba is None or uba is None or uba < lba:
154 154 raise error.Abort(_('line range exceeds file size'))
155 155 return filteredblocks, (lba, uba)
156 156
157 157 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
158 158 """Return (block, type) tuples, where block is an mdiff.blocks
159 159 line entry. type is '=' for blocks matching exactly one another
160 160 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
161 161 matching only after having filtered blank lines.
162 162 line1 and line2 are text1 and text2 split with splitnewlines() if
163 163 they are already available.
164 164 """
165 165 if opts is None:
166 166 opts = defaultopts
167 167 if opts.ignorews or opts.ignorewsamount:
168 168 text1 = wsclean(opts, text1, False)
169 169 text2 = wsclean(opts, text2, False)
170 170 diff = bdiff.blocks(text1, text2)
171 171 for i, s1 in enumerate(diff):
172 172 # The first match is special.
173 173 # we've either found a match starting at line 0 or a match later
174 174 # in the file. If it starts later, old and new below will both be
175 175 # empty and we'll continue to the next match.
176 176 if i > 0:
177 177 s = diff[i - 1]
178 178 else:
179 179 s = [0, 0, 0, 0]
180 180 s = [s[1], s1[0], s[3], s1[2]]
181 181
182 182 # bdiff sometimes gives huge matches past eof, this check eats them,
183 183 # and deals with the special first match case described above
184 184 if s[0] != s[1] or s[2] != s[3]:
185 185 type = '!'
186 186 if opts.ignoreblanklines:
187 187 if lines1 is None:
188 188 lines1 = splitnewlines(text1)
189 189 if lines2 is None:
190 190 lines2 = splitnewlines(text2)
191 191 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
192 192 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
193 193 if old == new:
194 194 type = '~'
195 195 yield s, type
196 196 yield s1, '='
197 197
198 198 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
199 199 def datetag(date, fn=None):
200 200 if not opts.git and not opts.nodates:
201 201 return '\t%s\n' % date
202 202 if fn and ' ' in fn:
203 203 return '\t\n'
204 204 return '\n'
205 205
206 206 if not a and not b:
207 207 return ""
208 208
209 209 if opts.noprefix:
210 210 aprefix = bprefix = ''
211 211 else:
212 212 aprefix = 'a/'
213 213 bprefix = 'b/'
214 214
215 215 epoch = util.datestr((0, 0))
216 216
217 217 fn1 = util.pconvert(fn1)
218 218 fn2 = util.pconvert(fn2)
219 219
220 220 if not opts.text and (util.binary(a) or util.binary(b)):
221 221 if a and b and len(a) == len(b) and a == b:
222 222 return ""
223 223 l = ['Binary file %s has changed\n' % fn1]
224 224 elif not a:
225 225 b = splitnewlines(b)
226 226 if a is None:
227 227 l1 = '--- /dev/null%s' % datetag(epoch)
228 228 else:
229 229 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
230 230 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
231 231 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
232 232 l = [l1, l2, l3] + ["+" + e for e in b]
233 233 elif not b:
234 234 a = splitnewlines(a)
235 235 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
236 236 if b is None:
237 237 l2 = '+++ /dev/null%s' % datetag(epoch)
238 238 else:
239 239 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
240 240 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
241 241 l = [l1, l2, l3] + ["-" + e for e in a]
242 242 else:
243 al = splitnewlines(a)
244 bl = splitnewlines(b)
245 l = list(_unidiff(a, b, al, bl, opts=opts))
243 l = list(_unidiff(a, b, opts=opts))
246 244 if not l:
247 245 return ""
248 246
249 247 l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))
250 248 l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))
251 249
252 250 for ln in xrange(len(l)):
253 251 if l[ln][-1] != '\n':
254 252 l[ln] += "\n\ No newline at end of file\n"
255 253
256 254 return "".join(l)
257 255
258 256 # creates a headerless unified diff
259 257 # t1 and t2 are the text to be diffed
260 # l1 and l2 are the text broken up into lines
261 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
258 def _unidiff(t1, t2, opts=defaultopts):
259 l1 = splitnewlines(t1)
260 l2 = splitnewlines(t2)
262 261 def contextend(l, len):
263 262 ret = l + opts.context
264 263 if ret > len:
265 264 ret = len
266 265 return ret
267 266
268 267 def contextstart(l):
269 268 ret = l - opts.context
270 269 if ret < 0:
271 270 return 0
272 271 return ret
273 272
274 273 lastfunc = [0, '']
275 274 def yieldhunk(hunk):
276 275 (astart, a2, bstart, b2, delta) = hunk
277 276 aend = contextend(a2, len(l1))
278 277 alen = aend - astart
279 278 blen = b2 - bstart + aend - a2
280 279
281 280 func = ""
282 281 if opts.showfunc:
283 282 lastpos, func = lastfunc
284 283 # walk backwards from the start of the context up to the start of
285 284 # the previous hunk context until we find a line starting with an
286 285 # alphanumeric char.
287 286 for i in xrange(astart - 1, lastpos - 1, -1):
288 287 if l1[i][0].isalnum():
289 288 func = ' ' + l1[i].rstrip()[:40]
290 289 lastfunc[1] = func
291 290 break
292 291 # by recording this hunk's starting point as the next place to
293 292 # start looking for function lines, we avoid reading any line in
294 293 # the file more than once.
295 294 lastfunc[0] = astart
296 295
297 296 # zero-length hunk ranges report their start line as one less
298 297 if alen:
299 298 astart += 1
300 299 if blen:
301 300 bstart += 1
302 301
303 302 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
304 303 bstart, blen, func)
305 304 for x in delta:
306 305 yield x
307 306 for x in xrange(a2, aend):
308 307 yield ' ' + l1[x]
309 308
310 309 # bdiff.blocks gives us the matching sequences in the files. The loop
311 310 # below finds the spaces between those matching sequences and translates
312 311 # them into diff output.
313 312 #
314 313 hunk = None
315 314 ignoredlines = 0
316 315 for s, stype in allblocks(t1, t2, opts, l1, l2):
317 316 a1, a2, b1, b2 = s
318 317 if stype != '!':
319 318 if stype == '~':
320 319 # The diff context lines are based on t1 content. When
321 320 # blank lines are ignored, the new lines offsets must
322 321 # be adjusted as if equivalent blocks ('~') had the
323 322 # same sizes on both sides.
324 323 ignoredlines += (b2 - b1) - (a2 - a1)
325 324 continue
326 325 delta = []
327 326 old = l1[a1:a2]
328 327 new = l2[b1:b2]
329 328
330 329 b1 -= ignoredlines
331 330 b2 -= ignoredlines
332 331 astart = contextstart(a1)
333 332 bstart = contextstart(b1)
334 333 prev = None
335 334 if hunk:
336 335 # join with the previous hunk if it falls inside the context
337 336 if astart < hunk[1] + opts.context + 1:
338 337 prev = hunk
339 338 astart = hunk[1]
340 339 bstart = hunk[3]
341 340 else:
342 341 for x in yieldhunk(hunk):
343 342 yield x
344 343 if prev:
345 344 # we've joined the previous hunk, record the new ending points.
346 345 hunk[1] = a2
347 346 hunk[3] = b2
348 347 delta = hunk[4]
349 348 else:
350 349 # create a new hunk
351 350 hunk = [astart, a2, bstart, b2, delta]
352 351
353 352 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
354 353 delta[len(delta):] = ['-' + x for x in old]
355 354 delta[len(delta):] = ['+' + x for x in new]
356 355
357 356 if hunk:
358 357 for x in yieldhunk(hunk):
359 358 yield x
360 359
361 360 def b85diff(to, tn):
362 361 '''print base85-encoded binary diff'''
363 362 def fmtline(line):
364 363 l = len(line)
365 364 if l <= 26:
366 365 l = chr(ord('A') + l - 1)
367 366 else:
368 367 l = chr(l - 26 + ord('a') - 1)
369 368 return '%c%s\n' % (l, base85.b85encode(line, True))
370 369
371 370 def chunk(text, csize=52):
372 371 l = len(text)
373 372 i = 0
374 373 while i < l:
375 374 yield text[i:i + csize]
376 375 i += csize
377 376
378 377 if to is None:
379 378 to = ''
380 379 if tn is None:
381 380 tn = ''
382 381
383 382 if to == tn:
384 383 return ''
385 384
386 385 # TODO: deltas
387 386 ret = []
388 387 ret.append('GIT binary patch\n')
389 388 ret.append('literal %s\n' % len(tn))
390 389 for l in chunk(zlib.compress(tn)):
391 390 ret.append(fmtline(l))
392 391 ret.append('\n')
393 392
394 393 return ''.join(ret)
395 394
396 395 def patchtext(bin):
397 396 pos = 0
398 397 t = []
399 398 while pos < len(bin):
400 399 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
401 400 pos += 12
402 401 t.append(bin[pos:pos + l])
403 402 pos += l
404 403 return "".join(t)
405 404
406 405 def patch(a, bin):
407 406 if len(a) == 0:
408 407 # skip over trivial delta header
409 408 return util.buffer(bin, 12)
410 409 return mpatch.patches(a, [bin])
411 410
412 411 # similar to difflib.SequenceMatcher.get_matching_blocks
413 412 def get_matching_blocks(a, b):
414 413 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
415 414
416 415 def trivialdiffheader(length):
417 416 return struct.pack(">lll", 0, 0, length) if length else ''
418 417
419 418 def replacediffheader(oldlen, newlen):
420 419 return struct.pack(">lll", 0, oldlen, newlen)
421 420
422 421 patches = mpatch.patches
423 422 patchedsize = mpatch.patchedsize
424 423 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now