##// END OF EJS Templates
mdiff: add helper for making deltas which replace the full text of a revision...
Mike Edgar -
r24119:a5a06c9c default
parent child Browse files
Show More
@@ -1,372 +1,375
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import bdiff, mpatch, util, base85
10 10 import re, struct, zlib
11 11
12 12 def splitnewlines(text):
13 13 '''like str.splitlines, but only split on newlines.'''
14 14 lines = [l + '\n' for l in text.split('\n')]
15 15 if lines:
16 16 if lines[-1] == '\n':
17 17 lines.pop()
18 18 else:
19 19 lines[-1] = lines[-1][:-1]
20 20 return lines
21 21
22 22 class diffopts(object):
23 23 '''context is the number of context lines
24 24 text treats all files as text
25 25 showfunc enables diff -p output
26 26 git enables the git extended patch format
27 27 nodates removes dates from diff headers
28 28 nobinary ignores binary files
29 29 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
30 30 ignorews ignores all whitespace changes in the diff
31 31 ignorewsamount ignores changes in the amount of whitespace
32 32 ignoreblanklines ignores changes whose lines are all blank
33 33 upgrade generates git diffs to avoid data loss
34 34 '''
35 35
36 36 defaults = {
37 37 'context': 3,
38 38 'text': False,
39 39 'showfunc': False,
40 40 'git': False,
41 41 'nodates': False,
42 42 'nobinary': False,
43 43 'noprefix': False,
44 44 'ignorews': False,
45 45 'ignorewsamount': False,
46 46 'ignoreblanklines': False,
47 47 'upgrade': False,
48 48 }
49 49
50 50 __slots__ = defaults.keys()
51 51
52 52 def __init__(self, **opts):
53 53 for k in self.__slots__:
54 54 v = opts.get(k)
55 55 if v is None:
56 56 v = self.defaults[k]
57 57 setattr(self, k, v)
58 58
59 59 try:
60 60 self.context = int(self.context)
61 61 except ValueError:
62 62 raise util.Abort(_('diff context lines count must be '
63 63 'an integer, not %r') % self.context)
64 64
65 65 def copy(self, **kwargs):
66 66 opts = dict((k, getattr(self, k)) for k in self.defaults)
67 67 opts.update(kwargs)
68 68 return diffopts(**opts)
69 69
70 70 defaultopts = diffopts()
71 71
72 72 def wsclean(opts, text, blank=True):
73 73 if opts.ignorews:
74 74 text = bdiff.fixws(text, 1)
75 75 elif opts.ignorewsamount:
76 76 text = bdiff.fixws(text, 0)
77 77 if blank and opts.ignoreblanklines:
78 78 text = re.sub('\n+', '\n', text).strip('\n')
79 79 return text
80 80
81 81 def splitblock(base1, lines1, base2, lines2, opts):
82 82 # The input lines matches except for interwoven blank lines. We
83 83 # transform it into a sequence of matching blocks and blank blocks.
84 84 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
85 85 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
86 86 s1, e1 = 0, len(lines1)
87 87 s2, e2 = 0, len(lines2)
88 88 while s1 < e1 or s2 < e2:
89 89 i1, i2, btype = s1, s2, '='
90 90 if (i1 >= e1 or lines1[i1] == 0
91 91 or i2 >= e2 or lines2[i2] == 0):
92 92 # Consume the block of blank lines
93 93 btype = '~'
94 94 while i1 < e1 and lines1[i1] == 0:
95 95 i1 += 1
96 96 while i2 < e2 and lines2[i2] == 0:
97 97 i2 += 1
98 98 else:
99 99 # Consume the matching lines
100 100 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
101 101 i1 += 1
102 102 i2 += 1
103 103 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
104 104 s1 = i1
105 105 s2 = i2
106 106
107 107 def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False):
108 108 """Return (block, type) tuples, where block is an mdiff.blocks
109 109 line entry. type is '=' for blocks matching exactly one another
110 110 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
111 111 matching only after having filtered blank lines. If refine is True,
112 112 then '~' blocks are refined and are only made of blank lines.
113 113 line1 and line2 are text1 and text2 split with splitnewlines() if
114 114 they are already available.
115 115 """
116 116 if opts is None:
117 117 opts = defaultopts
118 118 if opts.ignorews or opts.ignorewsamount:
119 119 text1 = wsclean(opts, text1, False)
120 120 text2 = wsclean(opts, text2, False)
121 121 diff = bdiff.blocks(text1, text2)
122 122 for i, s1 in enumerate(diff):
123 123 # The first match is special.
124 124 # we've either found a match starting at line 0 or a match later
125 125 # in the file. If it starts later, old and new below will both be
126 126 # empty and we'll continue to the next match.
127 127 if i > 0:
128 128 s = diff[i - 1]
129 129 else:
130 130 s = [0, 0, 0, 0]
131 131 s = [s[1], s1[0], s[3], s1[2]]
132 132
133 133 # bdiff sometimes gives huge matches past eof, this check eats them,
134 134 # and deals with the special first match case described above
135 135 if s[0] != s[1] or s[2] != s[3]:
136 136 type = '!'
137 137 if opts.ignoreblanklines:
138 138 if lines1 is None:
139 139 lines1 = splitnewlines(text1)
140 140 if lines2 is None:
141 141 lines2 = splitnewlines(text2)
142 142 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
143 143 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
144 144 if old == new:
145 145 type = '~'
146 146 yield s, type
147 147 yield s1, '='
148 148
149 149 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
150 150 def datetag(date, fn=None):
151 151 if not opts.git and not opts.nodates:
152 152 return '\t%s\n' % date
153 153 if fn and ' ' in fn:
154 154 return '\t\n'
155 155 return '\n'
156 156
157 157 if not a and not b:
158 158 return ""
159 159
160 160 if opts.noprefix:
161 161 aprefix = bprefix = ''
162 162 else:
163 163 aprefix = 'a/'
164 164 bprefix = 'b/'
165 165
166 166 epoch = util.datestr((0, 0))
167 167
168 168 fn1 = util.pconvert(fn1)
169 169 fn2 = util.pconvert(fn2)
170 170
171 171 if not opts.text and (util.binary(a) or util.binary(b)):
172 172 if a and b and len(a) == len(b) and a == b:
173 173 return ""
174 174 l = ['Binary file %s has changed\n' % fn1]
175 175 elif not a:
176 176 b = splitnewlines(b)
177 177 if a is None:
178 178 l1 = '--- /dev/null%s' % datetag(epoch)
179 179 else:
180 180 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
181 181 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
182 182 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
183 183 l = [l1, l2, l3] + ["+" + e for e in b]
184 184 elif not b:
185 185 a = splitnewlines(a)
186 186 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
187 187 if b is None:
188 188 l2 = '+++ /dev/null%s' % datetag(epoch)
189 189 else:
190 190 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
191 191 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
192 192 l = [l1, l2, l3] + ["-" + e for e in a]
193 193 else:
194 194 al = splitnewlines(a)
195 195 bl = splitnewlines(b)
196 196 l = list(_unidiff(a, b, al, bl, opts=opts))
197 197 if not l:
198 198 return ""
199 199
200 200 l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))
201 201 l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))
202 202
203 203 for ln in xrange(len(l)):
204 204 if l[ln][-1] != '\n':
205 205 l[ln] += "\n\ No newline at end of file\n"
206 206
207 207 return "".join(l)
208 208
209 209 # creates a headerless unified diff
210 210 # t1 and t2 are the text to be diffed
211 211 # l1 and l2 are the text broken up into lines
212 212 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
213 213 def contextend(l, len):
214 214 ret = l + opts.context
215 215 if ret > len:
216 216 ret = len
217 217 return ret
218 218
219 219 def contextstart(l):
220 220 ret = l - opts.context
221 221 if ret < 0:
222 222 return 0
223 223 return ret
224 224
225 225 lastfunc = [0, '']
226 226 def yieldhunk(hunk):
227 227 (astart, a2, bstart, b2, delta) = hunk
228 228 aend = contextend(a2, len(l1))
229 229 alen = aend - astart
230 230 blen = b2 - bstart + aend - a2
231 231
232 232 func = ""
233 233 if opts.showfunc:
234 234 lastpos, func = lastfunc
235 235 # walk backwards from the start of the context up to the start of
236 236 # the previous hunk context until we find a line starting with an
237 237 # alphanumeric char.
238 238 for i in xrange(astart - 1, lastpos - 1, -1):
239 239 if l1[i][0].isalnum():
240 240 func = ' ' + l1[i].rstrip()[:40]
241 241 lastfunc[1] = func
242 242 break
243 243 # by recording this hunk's starting point as the next place to
244 244 # start looking for function lines, we avoid reading any line in
245 245 # the file more than once.
246 246 lastfunc[0] = astart
247 247
248 248 # zero-length hunk ranges report their start line as one less
249 249 if alen:
250 250 astart += 1
251 251 if blen:
252 252 bstart += 1
253 253
254 254 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
255 255 bstart, blen, func)
256 256 for x in delta:
257 257 yield x
258 258 for x in xrange(a2, aend):
259 259 yield ' ' + l1[x]
260 260
261 261 # bdiff.blocks gives us the matching sequences in the files. The loop
262 262 # below finds the spaces between those matching sequences and translates
263 263 # them into diff output.
264 264 #
265 265 hunk = None
266 266 ignoredlines = 0
267 267 for s, stype in allblocks(t1, t2, opts, l1, l2):
268 268 a1, a2, b1, b2 = s
269 269 if stype != '!':
270 270 if stype == '~':
271 271 # The diff context lines are based on t1 content. When
272 272 # blank lines are ignored, the new lines offsets must
273 273 # be adjusted as if equivalent blocks ('~') had the
274 274 # same sizes on both sides.
275 275 ignoredlines += (b2 - b1) - (a2 - a1)
276 276 continue
277 277 delta = []
278 278 old = l1[a1:a2]
279 279 new = l2[b1:b2]
280 280
281 281 b1 -= ignoredlines
282 282 b2 -= ignoredlines
283 283 astart = contextstart(a1)
284 284 bstart = contextstart(b1)
285 285 prev = None
286 286 if hunk:
287 287 # join with the previous hunk if it falls inside the context
288 288 if astart < hunk[1] + opts.context + 1:
289 289 prev = hunk
290 290 astart = hunk[1]
291 291 bstart = hunk[3]
292 292 else:
293 293 for x in yieldhunk(hunk):
294 294 yield x
295 295 if prev:
296 296 # we've joined the previous hunk, record the new ending points.
297 297 hunk[1] = a2
298 298 hunk[3] = b2
299 299 delta = hunk[4]
300 300 else:
301 301 # create a new hunk
302 302 hunk = [astart, a2, bstart, b2, delta]
303 303
304 304 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
305 305 delta[len(delta):] = ['-' + x for x in old]
306 306 delta[len(delta):] = ['+' + x for x in new]
307 307
308 308 if hunk:
309 309 for x in yieldhunk(hunk):
310 310 yield x
311 311
312 312 def b85diff(to, tn):
313 313 '''print base85-encoded binary diff'''
314 314 def fmtline(line):
315 315 l = len(line)
316 316 if l <= 26:
317 317 l = chr(ord('A') + l - 1)
318 318 else:
319 319 l = chr(l - 26 + ord('a') - 1)
320 320 return '%c%s\n' % (l, base85.b85encode(line, True))
321 321
322 322 def chunk(text, csize=52):
323 323 l = len(text)
324 324 i = 0
325 325 while i < l:
326 326 yield text[i:i + csize]
327 327 i += csize
328 328
329 329 if to is None:
330 330 to = ''
331 331 if tn is None:
332 332 tn = ''
333 333
334 334 if to == tn:
335 335 return ''
336 336
337 337 # TODO: deltas
338 338 ret = []
339 339 ret.append('GIT binary patch\n')
340 340 ret.append('literal %s\n' % len(tn))
341 341 for l in chunk(zlib.compress(tn)):
342 342 ret.append(fmtline(l))
343 343 ret.append('\n')
344 344
345 345 return ''.join(ret)
346 346
347 347 def patchtext(bin):
348 348 pos = 0
349 349 t = []
350 350 while pos < len(bin):
351 351 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
352 352 pos += 12
353 353 t.append(bin[pos:pos + l])
354 354 pos += l
355 355 return "".join(t)
356 356
357 357 def patch(a, bin):
358 358 if len(a) == 0:
359 359 # skip over trivial delta header
360 360 return util.buffer(bin, 12)
361 361 return mpatch.patches(a, [bin])
362 362
363 363 # similar to difflib.SequenceMatcher.get_matching_blocks
364 364 def get_matching_blocks(a, b):
365 365 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
366 366
367 367 def trivialdiffheader(length):
368 368 return struct.pack(">lll", 0, 0, length)
369 369
370 def replacediffheader(oldlen, newlen):
371 return struct.pack(">lll", 0, oldlen, newlen)
372
370 373 patches = mpatch.patches
371 374 patchedsize = mpatch.patchedsize
372 375 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now