##// END OF EJS Templates
py3: slice on bytes instead of indexing...
Pulkit Goyal -
r35601:2f123f30 default
parent child Browse files
Show More
@@ -1,492 +1,492 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 policy,
18 18 pycompat,
19 19 util,
20 20 )
21 21
22 22 bdiff = policy.importmod(r'bdiff')
23 23 mpatch = policy.importmod(r'mpatch')
24 24
25 25 blocks = bdiff.blocks
26 26 fixws = bdiff.fixws
27 27 patches = mpatch.patches
28 28 patchedsize = mpatch.patchedsize
29 29 textdiff = bdiff.bdiff
30 30
31 31 def splitnewlines(text):
32 32 '''like str.splitlines, but only split on newlines.'''
33 33 lines = [l + '\n' for l in text.split('\n')]
34 34 if lines:
35 35 if lines[-1] == '\n':
36 36 lines.pop()
37 37 else:
38 38 lines[-1] = lines[-1][:-1]
39 39 return lines
40 40
41 41 class diffopts(object):
42 42 '''context is the number of context lines
43 43 text treats all files as text
44 44 showfunc enables diff -p output
45 45 git enables the git extended patch format
46 46 nodates removes dates from diff headers
47 47 nobinary ignores binary files
48 48 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
49 49 ignorews ignores all whitespace changes in the diff
50 50 ignorewsamount ignores changes in the amount of whitespace
51 51 ignoreblanklines ignores changes whose lines are all blank
52 52 upgrade generates git diffs to avoid data loss
53 53 '''
54 54
55 55 defaults = {
56 56 'context': 3,
57 57 'text': False,
58 58 'showfunc': False,
59 59 'git': False,
60 60 'nodates': False,
61 61 'nobinary': False,
62 62 'noprefix': False,
63 63 'index': 0,
64 64 'ignorews': False,
65 65 'ignorewsamount': False,
66 66 'ignorewseol': False,
67 67 'ignoreblanklines': False,
68 68 'upgrade': False,
69 69 'showsimilarity': False,
70 70 'worddiff': False,
71 71 }
72 72
73 73 def __init__(self, **opts):
74 74 opts = pycompat.byteskwargs(opts)
75 75 for k in self.defaults.keys():
76 76 v = opts.get(k)
77 77 if v is None:
78 78 v = self.defaults[k]
79 79 setattr(self, k, v)
80 80
81 81 try:
82 82 self.context = int(self.context)
83 83 except ValueError:
84 84 raise error.Abort(_('diff context lines count must be '
85 85 'an integer, not %r') % self.context)
86 86
87 87 def copy(self, **kwargs):
88 88 opts = dict((k, getattr(self, k)) for k in self.defaults)
89 89 opts = pycompat.strkwargs(opts)
90 90 opts.update(kwargs)
91 91 return diffopts(**opts)
92 92
93 93 defaultopts = diffopts()
94 94
95 95 def wsclean(opts, text, blank=True):
96 96 if opts.ignorews:
97 97 text = bdiff.fixws(text, 1)
98 98 elif opts.ignorewsamount:
99 99 text = bdiff.fixws(text, 0)
100 100 if blank and opts.ignoreblanklines:
101 101 text = re.sub('\n+', '\n', text).strip('\n')
102 102 if opts.ignorewseol:
103 103 text = re.sub(r'[ \t\r\f]+\n', r'\n', text)
104 104 return text
105 105
106 106 def splitblock(base1, lines1, base2, lines2, opts):
107 107 # The input lines matches except for interwoven blank lines. We
108 108 # transform it into a sequence of matching blocks and blank blocks.
109 109 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
110 110 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
111 111 s1, e1 = 0, len(lines1)
112 112 s2, e2 = 0, len(lines2)
113 113 while s1 < e1 or s2 < e2:
114 114 i1, i2, btype = s1, s2, '='
115 115 if (i1 >= e1 or lines1[i1] == 0
116 116 or i2 >= e2 or lines2[i2] == 0):
117 117 # Consume the block of blank lines
118 118 btype = '~'
119 119 while i1 < e1 and lines1[i1] == 0:
120 120 i1 += 1
121 121 while i2 < e2 and lines2[i2] == 0:
122 122 i2 += 1
123 123 else:
124 124 # Consume the matching lines
125 125 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
126 126 i1 += 1
127 127 i2 += 1
128 128 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
129 129 s1 = i1
130 130 s2 = i2
131 131
132 132 def hunkinrange(hunk, linerange):
133 133 """Return True if `hunk` defined as (start, length) is in `linerange`
134 134 defined as (lowerbound, upperbound).
135 135
136 136 >>> hunkinrange((5, 10), (2, 7))
137 137 True
138 138 >>> hunkinrange((5, 10), (6, 12))
139 139 True
140 140 >>> hunkinrange((5, 10), (13, 17))
141 141 True
142 142 >>> hunkinrange((5, 10), (3, 17))
143 143 True
144 144 >>> hunkinrange((5, 10), (1, 3))
145 145 False
146 146 >>> hunkinrange((5, 10), (18, 20))
147 147 False
148 148 >>> hunkinrange((5, 10), (1, 5))
149 149 False
150 150 >>> hunkinrange((5, 10), (15, 27))
151 151 False
152 152 """
153 153 start, length = hunk
154 154 lowerbound, upperbound = linerange
155 155 return lowerbound < start + length and start < upperbound
156 156
157 157 def blocksinrange(blocks, rangeb):
158 158 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
159 159 `rangeb` from ``(b1, b2)`` point of view.
160 160
161 161 Return `filteredblocks, rangea` where:
162 162
163 163 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
164 164 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
165 165 block ``(b1, b2)`` being inside `rangeb` if
166 166 ``rangeb[0] < b2 and b1 < rangeb[1]``;
167 167 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
168 168 """
169 169 lbb, ubb = rangeb
170 170 lba, uba = None, None
171 171 filteredblocks = []
172 172 for block in blocks:
173 173 (a1, a2, b1, b2), stype = block
174 174 if lbb >= b1 and ubb <= b2 and stype == '=':
175 175 # rangeb is within a single "=" hunk, restrict back linerange1
176 176 # by offsetting rangeb
177 177 lba = lbb - b1 + a1
178 178 uba = ubb - b1 + a1
179 179 else:
180 180 if b1 <= lbb < b2:
181 181 if stype == '=':
182 182 lba = a2 - (b2 - lbb)
183 183 else:
184 184 lba = a1
185 185 if b1 < ubb <= b2:
186 186 if stype == '=':
187 187 uba = a1 + (ubb - b1)
188 188 else:
189 189 uba = a2
190 190 if hunkinrange((b1, (b2 - b1)), rangeb):
191 191 filteredblocks.append(block)
192 192 if lba is None or uba is None or uba < lba:
193 193 raise error.Abort(_('line range exceeds file size'))
194 194 return filteredblocks, (lba, uba)
195 195
196 196 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
197 197 """Return (block, type) tuples, where block is an mdiff.blocks
198 198 line entry. type is '=' for blocks matching exactly one another
199 199 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
200 200 matching only after having filtered blank lines.
201 201 line1 and line2 are text1 and text2 split with splitnewlines() if
202 202 they are already available.
203 203 """
204 204 if opts is None:
205 205 opts = defaultopts
206 206 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
207 207 text1 = wsclean(opts, text1, False)
208 208 text2 = wsclean(opts, text2, False)
209 209 diff = bdiff.blocks(text1, text2)
210 210 for i, s1 in enumerate(diff):
211 211 # The first match is special.
212 212 # we've either found a match starting at line 0 or a match later
213 213 # in the file. If it starts later, old and new below will both be
214 214 # empty and we'll continue to the next match.
215 215 if i > 0:
216 216 s = diff[i - 1]
217 217 else:
218 218 s = [0, 0, 0, 0]
219 219 s = [s[1], s1[0], s[3], s1[2]]
220 220
221 221 # bdiff sometimes gives huge matches past eof, this check eats them,
222 222 # and deals with the special first match case described above
223 223 if s[0] != s[1] or s[2] != s[3]:
224 224 type = '!'
225 225 if opts.ignoreblanklines:
226 226 if lines1 is None:
227 227 lines1 = splitnewlines(text1)
228 228 if lines2 is None:
229 229 lines2 = splitnewlines(text2)
230 230 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
231 231 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
232 232 if old == new:
233 233 type = '~'
234 234 yield s, type
235 235 yield s1, '='
236 236
237 237 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
238 238 """Return a unified diff as a (headers, hunks) tuple.
239 239
240 240 If the diff is not null, `headers` is a list with unified diff header
241 241 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
242 242 (hunkrange, hunklines) coming from _unidiff().
243 243 Otherwise, `headers` and `hunks` are empty.
244 244 """
245 245 def datetag(date, fn=None):
246 246 if not opts.git and not opts.nodates:
247 247 return '\t%s' % date
248 248 if fn and ' ' in fn:
249 249 return '\t'
250 250 return ''
251 251
252 252 sentinel = [], ()
253 253 if not a and not b:
254 254 return sentinel
255 255
256 256 if opts.noprefix:
257 257 aprefix = bprefix = ''
258 258 else:
259 259 aprefix = 'a/'
260 260 bprefix = 'b/'
261 261
262 262 epoch = util.datestr((0, 0))
263 263
264 264 fn1 = util.pconvert(fn1)
265 265 fn2 = util.pconvert(fn2)
266 266
267 267 def checknonewline(lines):
268 268 for text in lines:
269 269 if text[-1:] != '\n':
270 270 text += "\n\ No newline at end of file\n"
271 271 yield text
272 272
273 273 if not opts.text and (util.binary(a) or util.binary(b)):
274 274 if a and b and len(a) == len(b) and a == b:
275 275 return sentinel
276 276 headerlines = []
277 277 hunks = (None, ['Binary file %s has changed\n' % fn1]),
278 278 elif not a:
279 279 b = splitnewlines(b)
280 280 if a is None:
281 281 l1 = '--- /dev/null%s' % datetag(epoch)
282 282 else:
283 283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
284 284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
285 285 headerlines = [l1, l2]
286 286 size = len(b)
287 287 hunkrange = (0, 0, 1, size)
288 288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
289 289 hunks = (hunkrange, checknonewline(hunklines)),
290 290 elif not b:
291 291 a = splitnewlines(a)
292 292 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
293 293 if b is None:
294 294 l2 = '+++ /dev/null%s' % datetag(epoch)
295 295 else:
296 296 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
297 297 headerlines = [l1, l2]
298 298 size = len(a)
299 299 hunkrange = (1, size, 0, 0)
300 300 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
301 301 hunks = (hunkrange, checknonewline(hunklines)),
302 302 else:
303 303 diffhunks = _unidiff(a, b, opts=opts)
304 304 try:
305 305 hunkrange, hunklines = next(diffhunks)
306 306 except StopIteration:
307 307 return sentinel
308 308
309 309 headerlines = [
310 310 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
311 311 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
312 312 ]
313 313 def rewindhunks():
314 314 yield hunkrange, checknonewline(hunklines)
315 315 for hr, hl in diffhunks:
316 316 yield hr, checknonewline(hl)
317 317
318 318 hunks = rewindhunks()
319 319
320 320 return headerlines, hunks
321 321
322 322 def _unidiff(t1, t2, opts=defaultopts):
323 323 """Yield hunks of a headerless unified diff from t1 and t2 texts.
324 324
325 325 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
326 326 tuple (s1, l1, s2, l2) representing the range information of the hunk to
327 327 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
328 328 of the hunk combining said header followed by line additions and
329 329 deletions.
330 330 """
331 331 l1 = splitnewlines(t1)
332 332 l2 = splitnewlines(t2)
333 333 def contextend(l, len):
334 334 ret = l + opts.context
335 335 if ret > len:
336 336 ret = len
337 337 return ret
338 338
339 339 def contextstart(l):
340 340 ret = l - opts.context
341 341 if ret < 0:
342 342 return 0
343 343 return ret
344 344
345 345 lastfunc = [0, '']
346 346 def yieldhunk(hunk):
347 347 (astart, a2, bstart, b2, delta) = hunk
348 348 aend = contextend(a2, len(l1))
349 349 alen = aend - astart
350 350 blen = b2 - bstart + aend - a2
351 351
352 352 func = ""
353 353 if opts.showfunc:
354 354 lastpos, func = lastfunc
355 355 # walk backwards from the start of the context up to the start of
356 356 # the previous hunk context until we find a line starting with an
357 357 # alphanumeric char.
358 358 for i in xrange(astart - 1, lastpos - 1, -1):
359 if l1[i][0].isalnum():
359 if l1[i][0:1].isalnum():
360 360 func = ' ' + l1[i].rstrip()[:40]
361 361 lastfunc[1] = func
362 362 break
363 363 # by recording this hunk's starting point as the next place to
364 364 # start looking for function lines, we avoid reading any line in
365 365 # the file more than once.
366 366 lastfunc[0] = astart
367 367
368 368 # zero-length hunk ranges report their start line as one less
369 369 if alen:
370 370 astart += 1
371 371 if blen:
372 372 bstart += 1
373 373
374 374 hunkrange = astart, alen, bstart, blen
375 375 hunklines = (
376 376 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
377 377 + delta
378 378 + [' ' + l1[x] for x in xrange(a2, aend)]
379 379 )
380 380 yield hunkrange, hunklines
381 381
382 382 # bdiff.blocks gives us the matching sequences in the files. The loop
383 383 # below finds the spaces between those matching sequences and translates
384 384 # them into diff output.
385 385 #
386 386 hunk = None
387 387 ignoredlines = 0
388 388 for s, stype in allblocks(t1, t2, opts, l1, l2):
389 389 a1, a2, b1, b2 = s
390 390 if stype != '!':
391 391 if stype == '~':
392 392 # The diff context lines are based on t1 content. When
393 393 # blank lines are ignored, the new lines offsets must
394 394 # be adjusted as if equivalent blocks ('~') had the
395 395 # same sizes on both sides.
396 396 ignoredlines += (b2 - b1) - (a2 - a1)
397 397 continue
398 398 delta = []
399 399 old = l1[a1:a2]
400 400 new = l2[b1:b2]
401 401
402 402 b1 -= ignoredlines
403 403 b2 -= ignoredlines
404 404 astart = contextstart(a1)
405 405 bstart = contextstart(b1)
406 406 prev = None
407 407 if hunk:
408 408 # join with the previous hunk if it falls inside the context
409 409 if astart < hunk[1] + opts.context + 1:
410 410 prev = hunk
411 411 astart = hunk[1]
412 412 bstart = hunk[3]
413 413 else:
414 414 for x in yieldhunk(hunk):
415 415 yield x
416 416 if prev:
417 417 # we've joined the previous hunk, record the new ending points.
418 418 hunk[1] = a2
419 419 hunk[3] = b2
420 420 delta = hunk[4]
421 421 else:
422 422 # create a new hunk
423 423 hunk = [astart, a2, bstart, b2, delta]
424 424
425 425 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
426 426 delta[len(delta):] = ['-' + x for x in old]
427 427 delta[len(delta):] = ['+' + x for x in new]
428 428
429 429 if hunk:
430 430 for x in yieldhunk(hunk):
431 431 yield x
432 432
433 433 def b85diff(to, tn):
434 434 '''print base85-encoded binary diff'''
435 435 def fmtline(line):
436 436 l = len(line)
437 437 if l <= 26:
438 438 l = chr(ord('A') + l - 1)
439 439 else:
440 440 l = chr(l - 26 + ord('a') - 1)
441 441 return '%c%s\n' % (l, util.b85encode(line, True))
442 442
443 443 def chunk(text, csize=52):
444 444 l = len(text)
445 445 i = 0
446 446 while i < l:
447 447 yield text[i:i + csize]
448 448 i += csize
449 449
450 450 if to is None:
451 451 to = ''
452 452 if tn is None:
453 453 tn = ''
454 454
455 455 if to == tn:
456 456 return ''
457 457
458 458 # TODO: deltas
459 459 ret = []
460 460 ret.append('GIT binary patch\n')
461 461 ret.append('literal %d\n' % len(tn))
462 462 for l in chunk(zlib.compress(tn)):
463 463 ret.append(fmtline(l))
464 464 ret.append('\n')
465 465
466 466 return ''.join(ret)
467 467
468 468 def patchtext(bin):
469 469 pos = 0
470 470 t = []
471 471 while pos < len(bin):
472 472 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
473 473 pos += 12
474 474 t.append(bin[pos:pos + l])
475 475 pos += l
476 476 return "".join(t)
477 477
478 478 def patch(a, bin):
479 479 if len(a) == 0:
480 480 # skip over trivial delta header
481 481 return util.buffer(bin, 12)
482 482 return mpatch.patches(a, [bin])
483 483
484 484 # similar to difflib.SequenceMatcher.get_matching_blocks
485 485 def get_matching_blocks(a, b):
486 486 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
487 487
488 488 def trivialdiffheader(length):
489 489 return struct.pack(">lll", 0, 0, length) if length else ''
490 490
491 491 def replacediffheader(oldlen, newlen):
492 492 return struct.pack(">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now