##// END OF EJS Templates
mdiff: split lines in allblocks() only when necessary...
Patrick Mezard -
r15529:b35cf472 default
parent child Browse files
Show More
@@ -1,336 +1,334
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import bdiff, mpatch, util
10 10 import re, struct
11 11
12 12 def splitnewlines(text):
13 13 '''like str.splitlines, but only split on newlines.'''
14 14 lines = [l + '\n' for l in text.split('\n')]
15 15 if lines:
16 16 if lines[-1] == '\n':
17 17 lines.pop()
18 18 else:
19 19 lines[-1] = lines[-1][:-1]
20 20 return lines
21 21
22 22 class diffopts(object):
23 23 '''context is the number of context lines
24 24 text treats all files as text
25 25 showfunc enables diff -p output
26 26 git enables the git extended patch format
27 27 nodates removes dates from diff headers
28 28 ignorews ignores all whitespace changes in the diff
29 29 ignorewsamount ignores changes in the amount of whitespace
30 30 ignoreblanklines ignores changes whose lines are all blank
31 31 upgrade generates git diffs to avoid data loss
32 32 '''
33 33
34 34 defaults = {
35 35 'context': 3,
36 36 'text': False,
37 37 'showfunc': False,
38 38 'git': False,
39 39 'nodates': False,
40 40 'ignorews': False,
41 41 'ignorewsamount': False,
42 42 'ignoreblanklines': False,
43 43 'upgrade': False,
44 44 }
45 45
46 46 __slots__ = defaults.keys()
47 47
48 48 def __init__(self, **opts):
49 49 for k in self.__slots__:
50 50 v = opts.get(k)
51 51 if v is None:
52 52 v = self.defaults[k]
53 53 setattr(self, k, v)
54 54
55 55 try:
56 56 self.context = int(self.context)
57 57 except ValueError:
58 58 raise util.Abort(_('diff context lines count must be '
59 59 'an integer, not %r') % self.context)
60 60
61 61 def copy(self, **kwargs):
62 62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 63 opts.update(kwargs)
64 64 return diffopts(**opts)
65 65
66 66 defaultopts = diffopts()
67 67
68 68 def wsclean(opts, text, blank=True):
69 69 if opts.ignorews:
70 70 text = re.sub('[ \t\r]+', '', text)
71 71 elif opts.ignorewsamount:
72 72 text = re.sub('[ \t\r]+', ' ', text)
73 73 text = text.replace(' \n', '\n')
74 74 if blank and opts.ignoreblanklines:
75 75 text = re.sub('\n+', '\n', text).strip('\n')
76 76 return text
77 77
78 78 def splitblock(base1, lines1, base2, lines2, opts):
79 79 # The input lines matches except for interwoven blank lines. We
80 80 # transform it into a sequence of matching blocks and blank blocks.
81 81 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
82 82 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
83 83 s1, e1 = 0, len(lines1)
84 84 s2, e2 = 0, len(lines2)
85 85 while s1 < e1 or s2 < e2:
86 86 i1, i2, btype = s1, s2, '='
87 87 if (i1 >= e1 or lines1[i1] == 0
88 88 or i2 >= e2 or lines2[i2] == 0):
89 89 # Consume the block of blank lines
90 90 btype = '~'
91 91 while i1 < e1 and lines1[i1] == 0:
92 92 i1 += 1
93 93 while i2 < e2 and lines2[i2] == 0:
94 94 i2 += 1
95 95 else:
96 96 # Consume the matching lines
97 97 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
98 98 i1 += 1
99 99 i2 += 1
100 100 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
101 101 s1 = i1
102 102 s2 = i2
103 103
104 104 def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False):
105 105 """Return (block, type) tuples, where block is an mdiff.blocks
106 106 line entry. type is '=' for blocks matching exactly one another
107 107 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
108 108 matching only after having filtered blank lines. If refine is True,
109 109 then '~' blocks are refined and are only made of blank lines.
110 110 line1 and line2 are text1 and text2 split with splitnewlines() if
111 111 they are already available.
112 112 """
113 113 if opts is None:
114 114 opts = defaultopts
115 if lines1 is None:
116 lines1 = splitnewlines(text1)
117 if lines2 is None:
118 lines2 = splitnewlines(text2)
119 115 if opts.ignorews or opts.ignorewsamount:
120 116 text1 = wsclean(opts, text1, False)
121 117 text2 = wsclean(opts, text2, False)
122 118 diff = bdiff.blocks(text1, text2)
123 119 for i, s1 in enumerate(diff):
124 120 # The first match is special.
125 121 # we've either found a match starting at line 0 or a match later
126 122 # in the file. If it starts later, old and new below will both be
127 123 # empty and we'll continue to the next match.
128 124 if i > 0:
129 125 s = diff[i - 1]
130 126 else:
131 127 s = [0, 0, 0, 0]
132 128 s = [s[1], s1[0], s[3], s1[2]]
133 old = lines1[s[0]:s[1]]
134 new = lines2[s[2]:s[3]]
135 129
136 130 # bdiff sometimes gives huge matches past eof, this check eats them,
137 131 # and deals with the special first match case described above
138 if old or new:
132 if s[0] != s[1] or s[2] != s[3]:
139 133 type = '!'
140 134 if opts.ignoreblanklines:
141 cold = wsclean(opts, "".join(old))
142 cnew = wsclean(opts, "".join(new))
143 if cold == cnew:
135 if lines1 is None:
136 lines1 = splitnewlines(text1)
137 if lines2 is None:
138 lines2 = splitnewlines(text2)
139 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
140 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
141 if old == new:
144 142 type = '~'
145 143 yield s, type
146 144 yield s1, '='
147 145
148 146 def diffline(revs, a, b, opts):
149 147 parts = ['diff']
150 148 if opts.git:
151 149 parts.append('--git')
152 150 if revs and not opts.git:
153 151 parts.append(' '.join(["-r %s" % rev for rev in revs]))
154 152 if opts.git:
155 153 parts.append('a/%s' % a)
156 154 parts.append('b/%s' % b)
157 155 else:
158 156 parts.append(a)
159 157 return ' '.join(parts) + '\n'
160 158
161 159 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
162 160 def datetag(date, addtab=True):
163 161 if not opts.git and not opts.nodates:
164 162 return '\t%s\n' % date
165 163 if addtab and ' ' in fn1:
166 164 return '\t\n'
167 165 return '\n'
168 166
169 167 if not a and not b:
170 168 return ""
171 169 epoch = util.datestr((0, 0))
172 170
173 171 fn1 = util.pconvert(fn1)
174 172 fn2 = util.pconvert(fn2)
175 173
176 174 if not opts.text and (util.binary(a) or util.binary(b)):
177 175 if a and b and len(a) == len(b) and a == b:
178 176 return ""
179 177 l = ['Binary file %s has changed\n' % fn1]
180 178 elif not a:
181 179 b = splitnewlines(b)
182 180 if a is None:
183 181 l1 = '--- /dev/null%s' % datetag(epoch, False)
184 182 else:
185 183 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
186 184 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
187 185 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
188 186 l = [l1, l2, l3] + ["+" + e for e in b]
189 187 elif not b:
190 188 a = splitnewlines(a)
191 189 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
192 190 if b is None:
193 191 l2 = '+++ /dev/null%s' % datetag(epoch, False)
194 192 else:
195 193 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
196 194 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
197 195 l = [l1, l2, l3] + ["-" + e for e in a]
198 196 else:
199 197 al = splitnewlines(a)
200 198 bl = splitnewlines(b)
201 199 l = list(_unidiff(a, b, al, bl, opts=opts))
202 200 if not l:
203 201 return ""
204 202
205 203 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
206 204 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
207 205
208 206 for ln in xrange(len(l)):
209 207 if l[ln][-1] != '\n':
210 208 l[ln] += "\n\ No newline at end of file\n"
211 209
212 210 if r:
213 211 l.insert(0, diffline(r, fn1, fn2, opts))
214 212
215 213 return "".join(l)
216 214
217 215 # creates a headerless unified diff
218 216 # t1 and t2 are the text to be diffed
219 217 # l1 and l2 are the text broken up into lines
220 218 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
221 219 def contextend(l, len):
222 220 ret = l + opts.context
223 221 if ret > len:
224 222 ret = len
225 223 return ret
226 224
227 225 def contextstart(l):
228 226 ret = l - opts.context
229 227 if ret < 0:
230 228 return 0
231 229 return ret
232 230
233 231 lastfunc = [0, '']
234 232 def yieldhunk(hunk):
235 233 (astart, a2, bstart, b2, delta) = hunk
236 234 aend = contextend(a2, len(l1))
237 235 alen = aend - astart
238 236 blen = b2 - bstart + aend - a2
239 237
240 238 func = ""
241 239 if opts.showfunc:
242 240 lastpos, func = lastfunc
243 241 # walk backwards from the start of the context up to the start of
244 242 # the previous hunk context until we find a line starting with an
245 243 # alphanumeric char.
246 244 for i in xrange(astart - 1, lastpos - 1, -1):
247 245 if l1[i][0].isalnum():
248 246 func = ' ' + l1[i].rstrip()[:40]
249 247 lastfunc[1] = func
250 248 break
251 249 # by recording this hunk's starting point as the next place to
252 250 # start looking for function lines, we avoid reading any line in
253 251 # the file more than once.
254 252 lastfunc[0] = astart
255 253
256 254 # zero-length hunk ranges report their start line as one less
257 255 if alen:
258 256 astart += 1
259 257 if blen:
260 258 bstart += 1
261 259
262 260 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
263 261 bstart, blen, func)
264 262 for x in delta:
265 263 yield x
266 264 for x in xrange(a2, aend):
267 265 yield ' ' + l1[x]
268 266
269 267 # bdiff.blocks gives us the matching sequences in the files. The loop
270 268 # below finds the spaces between those matching sequences and translates
271 269 # them into diff output.
272 270 #
273 271 hunk = None
274 272 for s, stype in allblocks(t1, t2, opts, l1, l2):
275 273 if stype != '!':
276 274 continue
277 275 delta = []
278 276 a1, a2, b1, b2 = s
279 277 old = l1[a1:a2]
280 278 new = l2[b1:b2]
281 279
282 280 astart = contextstart(a1)
283 281 bstart = contextstart(b1)
284 282 prev = None
285 283 if hunk:
286 284 # join with the previous hunk if it falls inside the context
287 285 if astart < hunk[1] + opts.context + 1:
288 286 prev = hunk
289 287 astart = hunk[1]
290 288 bstart = hunk[3]
291 289 else:
292 290 for x in yieldhunk(hunk):
293 291 yield x
294 292 if prev:
295 293 # we've joined the previous hunk, record the new ending points.
296 294 hunk[1] = a2
297 295 hunk[3] = b2
298 296 delta = hunk[4]
299 297 else:
300 298 # create a new hunk
301 299 hunk = [astart, a2, bstart, b2, delta]
302 300
303 301 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
304 302 delta[len(delta):] = ['-' + x for x in old]
305 303 delta[len(delta):] = ['+' + x for x in new]
306 304
307 305 if hunk:
308 306 for x in yieldhunk(hunk):
309 307 yield x
310 308
311 309 def patchtext(bin):
312 310 pos = 0
313 311 t = []
314 312 while pos < len(bin):
315 313 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
316 314 pos += 12
317 315 t.append(bin[pos:pos + l])
318 316 pos += l
319 317 return "".join(t)
320 318
321 319 def patch(a, bin):
322 320 if len(a) == 0:
323 321 # skip over trivial delta header
324 322 return buffer(bin, 12)
325 323 return mpatch.patches(a, [bin])
326 324
327 325 # similar to difflib.SequenceMatcher.get_matching_blocks
328 326 def get_matching_blocks(a, b):
329 327 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
330 328
331 329 def trivialdiffheader(length):
332 330 return struct.pack(">lll", 0, 0, length)
333 331
334 332 patches = mpatch.patches
335 333 patchedsize = mpatch.patchedsize
336 334 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now