##// END OF EJS Templates
mdiff: extract a checknonewline inner function in unidiff()
Denis Laxalde -
r31272:e41946f3 default
parent child Browse files
Show More
@@ -1,441 +1,443 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 base85,
17 17 bdiff,
18 18 error,
19 19 mpatch,
20 20 util,
21 21 )
22 22
23 23 def splitnewlines(text):
24 24 '''like str.splitlines, but only split on newlines.'''
25 25 lines = [l + '\n' for l in text.split('\n')]
26 26 if lines:
27 27 if lines[-1] == '\n':
28 28 lines.pop()
29 29 else:
30 30 lines[-1] = lines[-1][:-1]
31 31 return lines
32 32
33 33 class diffopts(object):
34 34 '''context is the number of context lines
35 35 text treats all files as text
36 36 showfunc enables diff -p output
37 37 git enables the git extended patch format
38 38 nodates removes dates from diff headers
39 39 nobinary ignores binary files
40 40 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
41 41 ignorews ignores all whitespace changes in the diff
42 42 ignorewsamount ignores changes in the amount of whitespace
43 43 ignoreblanklines ignores changes whose lines are all blank
44 44 upgrade generates git diffs to avoid data loss
45 45 '''
46 46
47 47 defaults = {
48 48 'context': 3,
49 49 'text': False,
50 50 'showfunc': False,
51 51 'git': False,
52 52 'nodates': False,
53 53 'nobinary': False,
54 54 'noprefix': False,
55 55 'index': 0,
56 56 'ignorews': False,
57 57 'ignorewsamount': False,
58 58 'ignoreblanklines': False,
59 59 'upgrade': False,
60 60 'showsimilarity': False,
61 61 }
62 62
63 63 def __init__(self, **opts):
64 64 for k in self.defaults.keys():
65 65 v = opts.get(k)
66 66 if v is None:
67 67 v = self.defaults[k]
68 68 setattr(self, k, v)
69 69
70 70 try:
71 71 self.context = int(self.context)
72 72 except ValueError:
73 73 raise error.Abort(_('diff context lines count must be '
74 74 'an integer, not %r') % self.context)
75 75
76 76 def copy(self, **kwargs):
77 77 opts = dict((k, getattr(self, k)) for k in self.defaults)
78 78 opts.update(kwargs)
79 79 return diffopts(**opts)
80 80
81 81 defaultopts = diffopts()
82 82
83 83 def wsclean(opts, text, blank=True):
84 84 if opts.ignorews:
85 85 text = bdiff.fixws(text, 1)
86 86 elif opts.ignorewsamount:
87 87 text = bdiff.fixws(text, 0)
88 88 if blank and opts.ignoreblanklines:
89 89 text = re.sub('\n+', '\n', text).strip('\n')
90 90 return text
91 91
92 92 def splitblock(base1, lines1, base2, lines2, opts):
93 93 # The input lines matches except for interwoven blank lines. We
94 94 # transform it into a sequence of matching blocks and blank blocks.
95 95 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
96 96 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
97 97 s1, e1 = 0, len(lines1)
98 98 s2, e2 = 0, len(lines2)
99 99 while s1 < e1 or s2 < e2:
100 100 i1, i2, btype = s1, s2, '='
101 101 if (i1 >= e1 or lines1[i1] == 0
102 102 or i2 >= e2 or lines2[i2] == 0):
103 103 # Consume the block of blank lines
104 104 btype = '~'
105 105 while i1 < e1 and lines1[i1] == 0:
106 106 i1 += 1
107 107 while i2 < e2 and lines2[i2] == 0:
108 108 i2 += 1
109 109 else:
110 110 # Consume the matching lines
111 111 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
112 112 i1 += 1
113 113 i2 += 1
114 114 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
115 115 s1 = i1
116 116 s2 = i2
117 117
118 118 def blocksinrange(blocks, rangeb):
119 119 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
120 120 `rangeb` from ``(b1, b2)`` point of view.
121 121
122 122 Return `filteredblocks, rangea` where:
123 123
124 124 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
125 125 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
126 126 block ``(b1, b2)`` being inside `rangeb` if
127 127 ``rangeb[0] < b2 and b1 < rangeb[1]``;
128 128 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
129 129 """
130 130 lbb, ubb = rangeb
131 131 lba, uba = None, None
132 132 filteredblocks = []
133 133 for block in blocks:
134 134 (a1, a2, b1, b2), stype = block
135 135 if lbb >= b1 and ubb <= b2 and stype == '=':
136 136 # rangeb is within a single "=" hunk, restrict back linerange1
137 137 # by offsetting rangeb
138 138 lba = lbb - b1 + a1
139 139 uba = ubb - b1 + a1
140 140 else:
141 141 if b1 <= lbb < b2:
142 142 if stype == '=':
143 143 lba = a2 - (b2 - lbb)
144 144 else:
145 145 lba = a1
146 146 if b1 < ubb <= b2:
147 147 if stype == '=':
148 148 uba = a1 + (ubb - b1)
149 149 else:
150 150 uba = a2
151 151 if lbb < b2 and b1 < ubb:
152 152 filteredblocks.append(block)
153 153 if lba is None or uba is None or uba < lba:
154 154 raise error.Abort(_('line range exceeds file size'))
155 155 return filteredblocks, (lba, uba)
156 156
157 157 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
158 158 """Return (block, type) tuples, where block is an mdiff.blocks
159 159 line entry. type is '=' for blocks matching exactly one another
160 160 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
161 161 matching only after having filtered blank lines.
162 162 line1 and line2 are text1 and text2 split with splitnewlines() if
163 163 they are already available.
164 164 """
165 165 if opts is None:
166 166 opts = defaultopts
167 167 if opts.ignorews or opts.ignorewsamount:
168 168 text1 = wsclean(opts, text1, False)
169 169 text2 = wsclean(opts, text2, False)
170 170 diff = bdiff.blocks(text1, text2)
171 171 for i, s1 in enumerate(diff):
172 172 # The first match is special.
173 173 # we've either found a match starting at line 0 or a match later
174 174 # in the file. If it starts later, old and new below will both be
175 175 # empty and we'll continue to the next match.
176 176 if i > 0:
177 177 s = diff[i - 1]
178 178 else:
179 179 s = [0, 0, 0, 0]
180 180 s = [s[1], s1[0], s[3], s1[2]]
181 181
182 182 # bdiff sometimes gives huge matches past eof, this check eats them,
183 183 # and deals with the special first match case described above
184 184 if s[0] != s[1] or s[2] != s[3]:
185 185 type = '!'
186 186 if opts.ignoreblanklines:
187 187 if lines1 is None:
188 188 lines1 = splitnewlines(text1)
189 189 if lines2 is None:
190 190 lines2 = splitnewlines(text2)
191 191 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
192 192 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
193 193 if old == new:
194 194 type = '~'
195 195 yield s, type
196 196 yield s1, '='
197 197
198 198 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
199 199 """Return a unified diff as a (headers, hunkstext) tuple.
200 200
201 201 If the diff is not null, `headers` is a list with unified diff header
202 202 lines "--- <original>" and "+++ <new>" and `hunkstext` is a string
203 203 containing diff hunks. Otherwise, both `headers` and `hunkstext` are
204 204 empty.
205 205 """
206 206 def datetag(date, fn=None):
207 207 if not opts.git and not opts.nodates:
208 208 return '\t%s' % date
209 209 if fn and ' ' in fn:
210 210 return '\t'
211 211 return ''
212 212
213 213 sentinel = [], ""
214 214 if not a and not b:
215 215 return sentinel
216 216
217 217 if opts.noprefix:
218 218 aprefix = bprefix = ''
219 219 else:
220 220 aprefix = 'a/'
221 221 bprefix = 'b/'
222 222
223 223 epoch = util.datestr((0, 0))
224 224
225 225 fn1 = util.pconvert(fn1)
226 226 fn2 = util.pconvert(fn2)
227 227
228 def checknonewline(lines):
229 for text in lines:
230 if text[-1] != '\n':
231 text += "\n\ No newline at end of file\n"
232 yield text
233
228 234 if not opts.text and (util.binary(a) or util.binary(b)):
229 235 if a and b and len(a) == len(b) and a == b:
230 236 return sentinel
231 237 headerlines = []
232 238 l = ['Binary file %s has changed\n' % fn1]
233 239 elif not a:
234 240 b = splitnewlines(b)
235 241 if a is None:
236 242 l1 = '--- /dev/null%s' % datetag(epoch)
237 243 else:
238 244 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
239 245 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
240 246 headerlines = [l1, l2]
241 247 l = ["@@ -0,0 +1,%d @@\n" % len(b)] + ["+" + e for e in b]
242 248 elif not b:
243 249 a = splitnewlines(a)
244 250 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
245 251 if b is None:
246 252 l2 = '+++ /dev/null%s' % datetag(epoch)
247 253 else:
248 254 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
249 255 headerlines = [l1, l2]
250 256 l = ["@@ -1,%d +0,0 @@\n" % len(a)] + ["-" + e for e in a]
251 257 else:
252 258 l = sum((hlines for hrange, hlines in _unidiff(a, b, opts=opts)), [])
253 259 if not l:
254 260 return sentinel
255 261
256 262 headerlines = [
257 263 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
258 264 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
259 265 ]
260 266
261 for ln in xrange(len(l)):
262 if l[ln][-1] != '\n':
263 l[ln] += "\n\ No newline at end of file\n"
264
265 return headerlines, "".join(l)
267 return headerlines, "".join(checknonewline(l))
266 268
267 269 def _unidiff(t1, t2, opts=defaultopts):
268 270 """Yield hunks of a headerless unified diff from t1 and t2 texts.
269 271
270 272 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
271 273 tuple (s1, l1, s2, l2) representing the range information of the hunk to
272 274 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
273 275 of the hunk combining said header followed by line additions and
274 276 deletions.
275 277 """
276 278 l1 = splitnewlines(t1)
277 279 l2 = splitnewlines(t2)
278 280 def contextend(l, len):
279 281 ret = l + opts.context
280 282 if ret > len:
281 283 ret = len
282 284 return ret
283 285
284 286 def contextstart(l):
285 287 ret = l - opts.context
286 288 if ret < 0:
287 289 return 0
288 290 return ret
289 291
290 292 lastfunc = [0, '']
291 293 def yieldhunk(hunk):
292 294 (astart, a2, bstart, b2, delta) = hunk
293 295 aend = contextend(a2, len(l1))
294 296 alen = aend - astart
295 297 blen = b2 - bstart + aend - a2
296 298
297 299 func = ""
298 300 if opts.showfunc:
299 301 lastpos, func = lastfunc
300 302 # walk backwards from the start of the context up to the start of
301 303 # the previous hunk context until we find a line starting with an
302 304 # alphanumeric char.
303 305 for i in xrange(astart - 1, lastpos - 1, -1):
304 306 if l1[i][0].isalnum():
305 307 func = ' ' + l1[i].rstrip()[:40]
306 308 lastfunc[1] = func
307 309 break
308 310 # by recording this hunk's starting point as the next place to
309 311 # start looking for function lines, we avoid reading any line in
310 312 # the file more than once.
311 313 lastfunc[0] = astart
312 314
313 315 # zero-length hunk ranges report their start line as one less
314 316 if alen:
315 317 astart += 1
316 318 if blen:
317 319 bstart += 1
318 320
319 321 hunkrange = astart, alen, bstart, blen
320 322 hunklines = (
321 323 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
322 324 + delta
323 325 + [' ' + l1[x] for x in xrange(a2, aend)]
324 326 )
325 327 yield hunkrange, hunklines
326 328
327 329 # bdiff.blocks gives us the matching sequences in the files. The loop
328 330 # below finds the spaces between those matching sequences and translates
329 331 # them into diff output.
330 332 #
331 333 hunk = None
332 334 ignoredlines = 0
333 335 for s, stype in allblocks(t1, t2, opts, l1, l2):
334 336 a1, a2, b1, b2 = s
335 337 if stype != '!':
336 338 if stype == '~':
337 339 # The diff context lines are based on t1 content. When
338 340 # blank lines are ignored, the new lines offsets must
339 341 # be adjusted as if equivalent blocks ('~') had the
340 342 # same sizes on both sides.
341 343 ignoredlines += (b2 - b1) - (a2 - a1)
342 344 continue
343 345 delta = []
344 346 old = l1[a1:a2]
345 347 new = l2[b1:b2]
346 348
347 349 b1 -= ignoredlines
348 350 b2 -= ignoredlines
349 351 astart = contextstart(a1)
350 352 bstart = contextstart(b1)
351 353 prev = None
352 354 if hunk:
353 355 # join with the previous hunk if it falls inside the context
354 356 if astart < hunk[1] + opts.context + 1:
355 357 prev = hunk
356 358 astart = hunk[1]
357 359 bstart = hunk[3]
358 360 else:
359 361 for x in yieldhunk(hunk):
360 362 yield x
361 363 if prev:
362 364 # we've joined the previous hunk, record the new ending points.
363 365 hunk[1] = a2
364 366 hunk[3] = b2
365 367 delta = hunk[4]
366 368 else:
367 369 # create a new hunk
368 370 hunk = [astart, a2, bstart, b2, delta]
369 371
370 372 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
371 373 delta[len(delta):] = ['-' + x for x in old]
372 374 delta[len(delta):] = ['+' + x for x in new]
373 375
374 376 if hunk:
375 377 for x in yieldhunk(hunk):
376 378 yield x
377 379
378 380 def b85diff(to, tn):
379 381 '''print base85-encoded binary diff'''
380 382 def fmtline(line):
381 383 l = len(line)
382 384 if l <= 26:
383 385 l = chr(ord('A') + l - 1)
384 386 else:
385 387 l = chr(l - 26 + ord('a') - 1)
386 388 return '%c%s\n' % (l, base85.b85encode(line, True))
387 389
388 390 def chunk(text, csize=52):
389 391 l = len(text)
390 392 i = 0
391 393 while i < l:
392 394 yield text[i:i + csize]
393 395 i += csize
394 396
395 397 if to is None:
396 398 to = ''
397 399 if tn is None:
398 400 tn = ''
399 401
400 402 if to == tn:
401 403 return ''
402 404
403 405 # TODO: deltas
404 406 ret = []
405 407 ret.append('GIT binary patch\n')
406 408 ret.append('literal %s\n' % len(tn))
407 409 for l in chunk(zlib.compress(tn)):
408 410 ret.append(fmtline(l))
409 411 ret.append('\n')
410 412
411 413 return ''.join(ret)
412 414
413 415 def patchtext(bin):
414 416 pos = 0
415 417 t = []
416 418 while pos < len(bin):
417 419 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
418 420 pos += 12
419 421 t.append(bin[pos:pos + l])
420 422 pos += l
421 423 return "".join(t)
422 424
423 425 def patch(a, bin):
424 426 if len(a) == 0:
425 427 # skip over trivial delta header
426 428 return util.buffer(bin, 12)
427 429 return mpatch.patches(a, [bin])
428 430
429 431 # similar to difflib.SequenceMatcher.get_matching_blocks
430 432 def get_matching_blocks(a, b):
431 433 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
432 434
433 435 def trivialdiffheader(length):
434 436 return struct.pack(">lll", 0, 0, length) if length else ''
435 437
436 438 def replacediffheader(oldlen, newlen):
437 439 return struct.pack(">lll", 0, oldlen, newlen)
438 440
439 441 patches = mpatch.patches
440 442 patchedsize = mpatch.patchedsize
441 443 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now