##// END OF EJS Templates
py3: fix error string with bytestr() on repr()d value...
Pulkit Goyal -
r37388:b6de372b default
parent child Browse files
Show More
@@ -1,526 +1,527
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 policy,
19 19 pycompat,
20 20 util,
21 21 )
22 22 from .utils import dateutil
23 23
24 24 _missing_newline_marker = "\\ No newline at end of file\n"
25 25
26 26 bdiff = policy.importmod(r'bdiff')
27 27 mpatch = policy.importmod(r'mpatch')
28 28
29 29 blocks = bdiff.blocks
30 30 fixws = bdiff.fixws
31 31 patches = mpatch.patches
32 32 patchedsize = mpatch.patchedsize
33 33 textdiff = bdiff.bdiff
34 34 splitnewlines = bdiff.splitnewlines
35 35
36 36 class diffopts(object):
37 37 '''context is the number of context lines
38 38 text treats all files as text
39 39 showfunc enables diff -p output
40 40 git enables the git extended patch format
41 41 nodates removes dates from diff headers
42 42 nobinary ignores binary files
43 43 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
44 44 ignorews ignores all whitespace changes in the diff
45 45 ignorewsamount ignores changes in the amount of whitespace
46 46 ignoreblanklines ignores changes whose lines are all blank
47 47 upgrade generates git diffs to avoid data loss
48 48 '''
49 49
50 50 defaults = {
51 51 'context': 3,
52 52 'text': False,
53 53 'showfunc': False,
54 54 'git': False,
55 55 'nodates': False,
56 56 'nobinary': False,
57 57 'noprefix': False,
58 58 'index': 0,
59 59 'ignorews': False,
60 60 'ignorewsamount': False,
61 61 'ignorewseol': False,
62 62 'ignoreblanklines': False,
63 63 'upgrade': False,
64 64 'showsimilarity': False,
65 65 'worddiff': False,
66 66 'xdiff': False,
67 67 }
68 68
69 69 def __init__(self, **opts):
70 70 opts = pycompat.byteskwargs(opts)
71 71 for k in self.defaults.keys():
72 72 v = opts.get(k)
73 73 if v is None:
74 74 v = self.defaults[k]
75 75 setattr(self, k, v)
76 76
77 77 try:
78 78 self.context = int(self.context)
79 79 except ValueError:
80 80 raise error.Abort(_('diff context lines count must be '
81 'an integer, not %r') % self.context)
81 'an integer, not %r') %
82 pycompat.bytestr(self.context))
82 83
83 84 def copy(self, **kwargs):
84 85 opts = dict((k, getattr(self, k)) for k in self.defaults)
85 86 opts = pycompat.strkwargs(opts)
86 87 opts.update(kwargs)
87 88 return diffopts(**opts)
88 89
89 90 defaultopts = diffopts()
90 91
91 92 def wsclean(opts, text, blank=True):
92 93 if opts.ignorews:
93 94 text = bdiff.fixws(text, 1)
94 95 elif opts.ignorewsamount:
95 96 text = bdiff.fixws(text, 0)
96 97 if blank and opts.ignoreblanklines:
97 98 text = re.sub('\n+', '\n', text).strip('\n')
98 99 if opts.ignorewseol:
99 100 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
100 101 return text
101 102
102 103 def splitblock(base1, lines1, base2, lines2, opts):
103 104 # The input lines matches except for interwoven blank lines. We
104 105 # transform it into a sequence of matching blocks and blank blocks.
105 106 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
106 107 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
107 108 s1, e1 = 0, len(lines1)
108 109 s2, e2 = 0, len(lines2)
109 110 while s1 < e1 or s2 < e2:
110 111 i1, i2, btype = s1, s2, '='
111 112 if (i1 >= e1 or lines1[i1] == 0
112 113 or i2 >= e2 or lines2[i2] == 0):
113 114 # Consume the block of blank lines
114 115 btype = '~'
115 116 while i1 < e1 and lines1[i1] == 0:
116 117 i1 += 1
117 118 while i2 < e2 and lines2[i2] == 0:
118 119 i2 += 1
119 120 else:
120 121 # Consume the matching lines
121 122 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
122 123 i1 += 1
123 124 i2 += 1
124 125 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
125 126 s1 = i1
126 127 s2 = i2
127 128
128 129 def hunkinrange(hunk, linerange):
129 130 """Return True if `hunk` defined as (start, length) is in `linerange`
130 131 defined as (lowerbound, upperbound).
131 132
132 133 >>> hunkinrange((5, 10), (2, 7))
133 134 True
134 135 >>> hunkinrange((5, 10), (6, 12))
135 136 True
136 137 >>> hunkinrange((5, 10), (13, 17))
137 138 True
138 139 >>> hunkinrange((5, 10), (3, 17))
139 140 True
140 141 >>> hunkinrange((5, 10), (1, 3))
141 142 False
142 143 >>> hunkinrange((5, 10), (18, 20))
143 144 False
144 145 >>> hunkinrange((5, 10), (1, 5))
145 146 False
146 147 >>> hunkinrange((5, 10), (15, 27))
147 148 False
148 149 """
149 150 start, length = hunk
150 151 lowerbound, upperbound = linerange
151 152 return lowerbound < start + length and start < upperbound
152 153
153 154 def blocksinrange(blocks, rangeb):
154 155 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
155 156 `rangeb` from ``(b1, b2)`` point of view.
156 157
157 158 Return `filteredblocks, rangea` where:
158 159
159 160 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
160 161 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
161 162 block ``(b1, b2)`` being inside `rangeb` if
162 163 ``rangeb[0] < b2 and b1 < rangeb[1]``;
163 164 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
164 165 """
165 166 lbb, ubb = rangeb
166 167 lba, uba = None, None
167 168 filteredblocks = []
168 169 for block in blocks:
169 170 (a1, a2, b1, b2), stype = block
170 171 if lbb >= b1 and ubb <= b2 and stype == '=':
171 172 # rangeb is within a single "=" hunk, restrict back linerange1
172 173 # by offsetting rangeb
173 174 lba = lbb - b1 + a1
174 175 uba = ubb - b1 + a1
175 176 else:
176 177 if b1 <= lbb < b2:
177 178 if stype == '=':
178 179 lba = a2 - (b2 - lbb)
179 180 else:
180 181 lba = a1
181 182 if b1 < ubb <= b2:
182 183 if stype == '=':
183 184 uba = a1 + (ubb - b1)
184 185 else:
185 186 uba = a2
186 187 if hunkinrange((b1, (b2 - b1)), rangeb):
187 188 filteredblocks.append(block)
188 189 if lba is None or uba is None or uba < lba:
189 190 raise error.Abort(_('line range exceeds file size'))
190 191 return filteredblocks, (lba, uba)
191 192
192 193 def chooseblocksfunc(opts=None):
193 194 if (opts is None or not opts.xdiff
194 195 or not util.safehasattr(bdiff, 'xdiffblocks')):
195 196 return bdiff.blocks
196 197 else:
197 198 return bdiff.xdiffblocks
198 199
199 200 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
200 201 """Return (block, type) tuples, where block is an mdiff.blocks
201 202 line entry. type is '=' for blocks matching exactly one another
202 203 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
203 204 matching only after having filtered blank lines.
204 205 line1 and line2 are text1 and text2 split with splitnewlines() if
205 206 they are already available.
206 207 """
207 208 if opts is None:
208 209 opts = defaultopts
209 210 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
210 211 text1 = wsclean(opts, text1, False)
211 212 text2 = wsclean(opts, text2, False)
212 213 diff = chooseblocksfunc(opts)(text1, text2)
213 214 for i, s1 in enumerate(diff):
214 215 # The first match is special.
215 216 # we've either found a match starting at line 0 or a match later
216 217 # in the file. If it starts later, old and new below will both be
217 218 # empty and we'll continue to the next match.
218 219 if i > 0:
219 220 s = diff[i - 1]
220 221 else:
221 222 s = [0, 0, 0, 0]
222 223 s = [s[1], s1[0], s[3], s1[2]]
223 224
224 225 # bdiff sometimes gives huge matches past eof, this check eats them,
225 226 # and deals with the special first match case described above
226 227 if s[0] != s[1] or s[2] != s[3]:
227 228 type = '!'
228 229 if opts.ignoreblanklines:
229 230 if lines1 is None:
230 231 lines1 = splitnewlines(text1)
231 232 if lines2 is None:
232 233 lines2 = splitnewlines(text2)
233 234 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
234 235 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
235 236 if old == new:
236 237 type = '~'
237 238 yield s, type
238 239 yield s1, '='
239 240
240 241 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
241 242 """Return a unified diff as a (headers, hunks) tuple.
242 243
243 244 If the diff is not null, `headers` is a list with unified diff header
244 245 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
245 246 (hunkrange, hunklines) coming from _unidiff().
246 247 Otherwise, `headers` and `hunks` are empty.
247 248
248 249 Set binary=True if either a or b should be taken as a binary file.
249 250 """
250 251 def datetag(date, fn=None):
251 252 if not opts.git and not opts.nodates:
252 253 return '\t%s' % date
253 254 if fn and ' ' in fn:
254 255 return '\t'
255 256 return ''
256 257
257 258 sentinel = [], ()
258 259 if not a and not b:
259 260 return sentinel
260 261
261 262 if opts.noprefix:
262 263 aprefix = bprefix = ''
263 264 else:
264 265 aprefix = 'a/'
265 266 bprefix = 'b/'
266 267
267 268 epoch = dateutil.datestr((0, 0))
268 269
269 270 fn1 = util.pconvert(fn1)
270 271 fn2 = util.pconvert(fn2)
271 272
272 273 if binary:
273 274 if a and b and len(a) == len(b) and a == b:
274 275 return sentinel
275 276 headerlines = []
276 277 hunks = (None, ['Binary file %s has changed\n' % fn1]),
277 278 elif not a:
278 279 without_newline = not b.endswith('\n')
279 280 b = splitnewlines(b)
280 281 if a is None:
281 282 l1 = '--- /dev/null%s' % datetag(epoch)
282 283 else:
283 284 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
284 285 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
285 286 headerlines = [l1, l2]
286 287 size = len(b)
287 288 hunkrange = (0, 0, 1, size)
288 289 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
289 290 if without_newline:
290 291 hunklines[-1] += '\n'
291 292 hunklines.append(_missing_newline_marker)
292 293 hunks = (hunkrange, hunklines),
293 294 elif not b:
294 295 without_newline = not a.endswith('\n')
295 296 a = splitnewlines(a)
296 297 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
297 298 if b is None:
298 299 l2 = '+++ /dev/null%s' % datetag(epoch)
299 300 else:
300 301 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
301 302 headerlines = [l1, l2]
302 303 size = len(a)
303 304 hunkrange = (1, size, 0, 0)
304 305 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
305 306 if without_newline:
306 307 hunklines[-1] += '\n'
307 308 hunklines.append(_missing_newline_marker)
308 309 hunks = (hunkrange, hunklines),
309 310 else:
310 311 hunks = _unidiff(a, b, opts=opts)
311 312 if not next(hunks):
312 313 return sentinel
313 314
314 315 headerlines = [
315 316 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
316 317 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
317 318 ]
318 319
319 320 return headerlines, hunks
320 321
321 322 def _unidiff(t1, t2, opts=defaultopts):
322 323 """Yield hunks of a headerless unified diff from t1 and t2 texts.
323 324
324 325 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
325 326 tuple (s1, l1, s2, l2) representing the range information of the hunk to
326 327 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
327 328 of the hunk combining said header followed by line additions and
328 329 deletions.
329 330
330 331 The hunks are prefixed with a bool.
331 332 """
332 333 l1 = splitnewlines(t1)
333 334 l2 = splitnewlines(t2)
334 335 def contextend(l, len):
335 336 ret = l + opts.context
336 337 if ret > len:
337 338 ret = len
338 339 return ret
339 340
340 341 def contextstart(l):
341 342 ret = l - opts.context
342 343 if ret < 0:
343 344 return 0
344 345 return ret
345 346
346 347 lastfunc = [0, '']
347 348 def yieldhunk(hunk):
348 349 (astart, a2, bstart, b2, delta) = hunk
349 350 aend = contextend(a2, len(l1))
350 351 alen = aend - astart
351 352 blen = b2 - bstart + aend - a2
352 353
353 354 func = ""
354 355 if opts.showfunc:
355 356 lastpos, func = lastfunc
356 357 # walk backwards from the start of the context up to the start of
357 358 # the previous hunk context until we find a line starting with an
358 359 # alphanumeric char.
359 360 for i in xrange(astart - 1, lastpos - 1, -1):
360 361 if l1[i][0:1].isalnum():
361 362 func = b' ' + l1[i].rstrip()
362 363 # split long function name if ASCII. otherwise we have no
363 364 # idea where the multi-byte boundary is, so just leave it.
364 365 if encoding.isasciistr(func):
365 366 func = func[:41]
366 367 lastfunc[1] = func
367 368 break
368 369 # by recording this hunk's starting point as the next place to
369 370 # start looking for function lines, we avoid reading any line in
370 371 # the file more than once.
371 372 lastfunc[0] = astart
372 373
373 374 # zero-length hunk ranges report their start line as one less
374 375 if alen:
375 376 astart += 1
376 377 if blen:
377 378 bstart += 1
378 379
379 380 hunkrange = astart, alen, bstart, blen
380 381 hunklines = (
381 382 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
382 383 + delta
383 384 + [' ' + l1[x] for x in xrange(a2, aend)]
384 385 )
385 386 # If either file ends without a newline and the last line of
386 387 # that file is part of a hunk, a marker is printed. If the
387 388 # last line of both files is identical and neither ends in
388 389 # a newline, print only one marker. That's the only case in
389 390 # which the hunk can end in a shared line without a newline.
390 391 skip = False
391 392 if not t1.endswith('\n') and astart + alen == len(l1) + 1:
392 393 for i in xrange(len(hunklines) - 1, -1, -1):
393 394 if hunklines[i].startswith(('-', ' ')):
394 395 if hunklines[i].startswith(' '):
395 396 skip = True
396 397 hunklines[i] += '\n'
397 398 hunklines.insert(i + 1, _missing_newline_marker)
398 399 break
399 400 if not skip and not t2.endswith('\n') and bstart + blen == len(l2) + 1:
400 401 for i in xrange(len(hunklines) - 1, -1, -1):
401 402 if hunklines[i].startswith('+'):
402 403 hunklines[i] += '\n'
403 404 hunklines.insert(i + 1, _missing_newline_marker)
404 405 break
405 406 yield hunkrange, hunklines
406 407
407 408 # bdiff.blocks gives us the matching sequences in the files. The loop
408 409 # below finds the spaces between those matching sequences and translates
409 410 # them into diff output.
410 411 #
411 412 hunk = None
412 413 ignoredlines = 0
413 414 has_hunks = False
414 415 for s, stype in allblocks(t1, t2, opts, l1, l2):
415 416 a1, a2, b1, b2 = s
416 417 if stype != '!':
417 418 if stype == '~':
418 419 # The diff context lines are based on t1 content. When
419 420 # blank lines are ignored, the new lines offsets must
420 421 # be adjusted as if equivalent blocks ('~') had the
421 422 # same sizes on both sides.
422 423 ignoredlines += (b2 - b1) - (a2 - a1)
423 424 continue
424 425 delta = []
425 426 old = l1[a1:a2]
426 427 new = l2[b1:b2]
427 428
428 429 b1 -= ignoredlines
429 430 b2 -= ignoredlines
430 431 astart = contextstart(a1)
431 432 bstart = contextstart(b1)
432 433 prev = None
433 434 if hunk:
434 435 # join with the previous hunk if it falls inside the context
435 436 if astart < hunk[1] + opts.context + 1:
436 437 prev = hunk
437 438 astart = hunk[1]
438 439 bstart = hunk[3]
439 440 else:
440 441 if not has_hunks:
441 442 has_hunks = True
442 443 yield True
443 444 for x in yieldhunk(hunk):
444 445 yield x
445 446 if prev:
446 447 # we've joined the previous hunk, record the new ending points.
447 448 hunk[1] = a2
448 449 hunk[3] = b2
449 450 delta = hunk[4]
450 451 else:
451 452 # create a new hunk
452 453 hunk = [astart, a2, bstart, b2, delta]
453 454
454 455 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
455 456 delta[len(delta):] = ['-' + x for x in old]
456 457 delta[len(delta):] = ['+' + x for x in new]
457 458
458 459 if hunk:
459 460 if not has_hunks:
460 461 has_hunks = True
461 462 yield True
462 463 for x in yieldhunk(hunk):
463 464 yield x
464 465 elif not has_hunks:
465 466 yield False
466 467
467 468 def b85diff(to, tn):
468 469 '''print base85-encoded binary diff'''
469 470 def fmtline(line):
470 471 l = len(line)
471 472 if l <= 26:
472 473 l = pycompat.bytechr(ord('A') + l - 1)
473 474 else:
474 475 l = pycompat.bytechr(l - 26 + ord('a') - 1)
475 476 return '%c%s\n' % (l, util.b85encode(line, True))
476 477
477 478 def chunk(text, csize=52):
478 479 l = len(text)
479 480 i = 0
480 481 while i < l:
481 482 yield text[i:i + csize]
482 483 i += csize
483 484
484 485 if to is None:
485 486 to = ''
486 487 if tn is None:
487 488 tn = ''
488 489
489 490 if to == tn:
490 491 return ''
491 492
492 493 # TODO: deltas
493 494 ret = []
494 495 ret.append('GIT binary patch\n')
495 496 ret.append('literal %d\n' % len(tn))
496 497 for l in chunk(zlib.compress(tn)):
497 498 ret.append(fmtline(l))
498 499 ret.append('\n')
499 500
500 501 return ''.join(ret)
501 502
502 503 def patchtext(bin):
503 504 pos = 0
504 505 t = []
505 506 while pos < len(bin):
506 507 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
507 508 pos += 12
508 509 t.append(bin[pos:pos + l])
509 510 pos += l
510 511 return "".join(t)
511 512
512 513 def patch(a, bin):
513 514 if len(a) == 0:
514 515 # skip over trivial delta header
515 516 return util.buffer(bin, 12)
516 517 return mpatch.patches(a, [bin])
517 518
518 519 # similar to difflib.SequenceMatcher.get_matching_blocks
519 520 def get_matching_blocks(a, b):
520 521 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
521 522
522 523 def trivialdiffheader(length):
523 524 return struct.pack(">lll", 0, 0, length) if length else ''
524 525
525 526 def replacediffheader(oldlen, newlen):
526 527 return struct.pack(">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now