##// END OF EJS Templates
mdiff: split lines in allblocks() only when necessary...
Patrick Mezard -
r15529:b35cf472 default
parent child Browse files
Show More
@@ -1,336 +1,334 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import bdiff, mpatch, util
9 import bdiff, mpatch, util
10 import re, struct
10 import re, struct
11
11
12 def splitnewlines(text):
12 def splitnewlines(text):
13 '''like str.splitlines, but only split on newlines.'''
13 '''like str.splitlines, but only split on newlines.'''
14 lines = [l + '\n' for l in text.split('\n')]
14 lines = [l + '\n' for l in text.split('\n')]
15 if lines:
15 if lines:
16 if lines[-1] == '\n':
16 if lines[-1] == '\n':
17 lines.pop()
17 lines.pop()
18 else:
18 else:
19 lines[-1] = lines[-1][:-1]
19 lines[-1] = lines[-1][:-1]
20 return lines
20 return lines
21
21
22 class diffopts(object):
22 class diffopts(object):
23 '''context is the number of context lines
23 '''context is the number of context lines
24 text treats all files as text
24 text treats all files as text
25 showfunc enables diff -p output
25 showfunc enables diff -p output
26 git enables the git extended patch format
26 git enables the git extended patch format
27 nodates removes dates from diff headers
27 nodates removes dates from diff headers
28 ignorews ignores all whitespace changes in the diff
28 ignorews ignores all whitespace changes in the diff
29 ignorewsamount ignores changes in the amount of whitespace
29 ignorewsamount ignores changes in the amount of whitespace
30 ignoreblanklines ignores changes whose lines are all blank
30 ignoreblanklines ignores changes whose lines are all blank
31 upgrade generates git diffs to avoid data loss
31 upgrade generates git diffs to avoid data loss
32 '''
32 '''
33
33
34 defaults = {
34 defaults = {
35 'context': 3,
35 'context': 3,
36 'text': False,
36 'text': False,
37 'showfunc': False,
37 'showfunc': False,
38 'git': False,
38 'git': False,
39 'nodates': False,
39 'nodates': False,
40 'ignorews': False,
40 'ignorews': False,
41 'ignorewsamount': False,
41 'ignorewsamount': False,
42 'ignoreblanklines': False,
42 'ignoreblanklines': False,
43 'upgrade': False,
43 'upgrade': False,
44 }
44 }
45
45
46 __slots__ = defaults.keys()
46 __slots__ = defaults.keys()
47
47
48 def __init__(self, **opts):
48 def __init__(self, **opts):
49 for k in self.__slots__:
49 for k in self.__slots__:
50 v = opts.get(k)
50 v = opts.get(k)
51 if v is None:
51 if v is None:
52 v = self.defaults[k]
52 v = self.defaults[k]
53 setattr(self, k, v)
53 setattr(self, k, v)
54
54
55 try:
55 try:
56 self.context = int(self.context)
56 self.context = int(self.context)
57 except ValueError:
57 except ValueError:
58 raise util.Abort(_('diff context lines count must be '
58 raise util.Abort(_('diff context lines count must be '
59 'an integer, not %r') % self.context)
59 'an integer, not %r') % self.context)
60
60
61 def copy(self, **kwargs):
61 def copy(self, **kwargs):
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 opts.update(kwargs)
63 opts.update(kwargs)
64 return diffopts(**opts)
64 return diffopts(**opts)
65
65
66 defaultopts = diffopts()
66 defaultopts = diffopts()
67
67
68 def wsclean(opts, text, blank=True):
68 def wsclean(opts, text, blank=True):
69 if opts.ignorews:
69 if opts.ignorews:
70 text = re.sub('[ \t\r]+', '', text)
70 text = re.sub('[ \t\r]+', '', text)
71 elif opts.ignorewsamount:
71 elif opts.ignorewsamount:
72 text = re.sub('[ \t\r]+', ' ', text)
72 text = re.sub('[ \t\r]+', ' ', text)
73 text = text.replace(' \n', '\n')
73 text = text.replace(' \n', '\n')
74 if blank and opts.ignoreblanklines:
74 if blank and opts.ignoreblanklines:
75 text = re.sub('\n+', '\n', text).strip('\n')
75 text = re.sub('\n+', '\n', text).strip('\n')
76 return text
76 return text
77
77
78 def splitblock(base1, lines1, base2, lines2, opts):
78 def splitblock(base1, lines1, base2, lines2, opts):
79 # The input lines matches except for interwoven blank lines. We
79 # The input lines matches except for interwoven blank lines. We
80 # transform it into a sequence of matching blocks and blank blocks.
80 # transform it into a sequence of matching blocks and blank blocks.
81 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
81 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
82 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
82 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
83 s1, e1 = 0, len(lines1)
83 s1, e1 = 0, len(lines1)
84 s2, e2 = 0, len(lines2)
84 s2, e2 = 0, len(lines2)
85 while s1 < e1 or s2 < e2:
85 while s1 < e1 or s2 < e2:
86 i1, i2, btype = s1, s2, '='
86 i1, i2, btype = s1, s2, '='
87 if (i1 >= e1 or lines1[i1] == 0
87 if (i1 >= e1 or lines1[i1] == 0
88 or i2 >= e2 or lines2[i2] == 0):
88 or i2 >= e2 or lines2[i2] == 0):
89 # Consume the block of blank lines
89 # Consume the block of blank lines
90 btype = '~'
90 btype = '~'
91 while i1 < e1 and lines1[i1] == 0:
91 while i1 < e1 and lines1[i1] == 0:
92 i1 += 1
92 i1 += 1
93 while i2 < e2 and lines2[i2] == 0:
93 while i2 < e2 and lines2[i2] == 0:
94 i2 += 1
94 i2 += 1
95 else:
95 else:
96 # Consume the matching lines
96 # Consume the matching lines
97 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
97 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
98 i1 += 1
98 i1 += 1
99 i2 += 1
99 i2 += 1
100 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
100 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
101 s1 = i1
101 s1 = i1
102 s2 = i2
102 s2 = i2
103
103
104 def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False):
104 def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False):
105 """Return (block, type) tuples, where block is an mdiff.blocks
105 """Return (block, type) tuples, where block is an mdiff.blocks
106 line entry. type is '=' for blocks matching exactly one another
106 line entry. type is '=' for blocks matching exactly one another
107 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
107 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
108 matching only after having filtered blank lines. If refine is True,
108 matching only after having filtered blank lines. If refine is True,
109 then '~' blocks are refined and are only made of blank lines.
109 then '~' blocks are refined and are only made of blank lines.
110 line1 and line2 are text1 and text2 split with splitnewlines() if
110 line1 and line2 are text1 and text2 split with splitnewlines() if
111 they are already available.
111 they are already available.
112 """
112 """
113 if opts is None:
113 if opts is None:
114 opts = defaultopts
114 opts = defaultopts
115 if lines1 is None:
116 lines1 = splitnewlines(text1)
117 if lines2 is None:
118 lines2 = splitnewlines(text2)
119 if opts.ignorews or opts.ignorewsamount:
115 if opts.ignorews or opts.ignorewsamount:
120 text1 = wsclean(opts, text1, False)
116 text1 = wsclean(opts, text1, False)
121 text2 = wsclean(opts, text2, False)
117 text2 = wsclean(opts, text2, False)
122 diff = bdiff.blocks(text1, text2)
118 diff = bdiff.blocks(text1, text2)
123 for i, s1 in enumerate(diff):
119 for i, s1 in enumerate(diff):
124 # The first match is special.
120 # The first match is special.
125 # we've either found a match starting at line 0 or a match later
121 # we've either found a match starting at line 0 or a match later
126 # in the file. If it starts later, old and new below will both be
122 # in the file. If it starts later, old and new below will both be
127 # empty and we'll continue to the next match.
123 # empty and we'll continue to the next match.
128 if i > 0:
124 if i > 0:
129 s = diff[i - 1]
125 s = diff[i - 1]
130 else:
126 else:
131 s = [0, 0, 0, 0]
127 s = [0, 0, 0, 0]
132 s = [s[1], s1[0], s[3], s1[2]]
128 s = [s[1], s1[0], s[3], s1[2]]
133 old = lines1[s[0]:s[1]]
134 new = lines2[s[2]:s[3]]
135
129
136 # bdiff sometimes gives huge matches past eof, this check eats them,
130 # bdiff sometimes gives huge matches past eof, this check eats them,
137 # and deals with the special first match case described above
131 # and deals with the special first match case described above
138 if old or new:
132 if s[0] != s[1] or s[2] != s[3]:
139 type = '!'
133 type = '!'
140 if opts.ignoreblanklines:
134 if opts.ignoreblanklines:
141 cold = wsclean(opts, "".join(old))
135 if lines1 is None:
142 cnew = wsclean(opts, "".join(new))
136 lines1 = splitnewlines(text1)
143 if cold == cnew:
137 if lines2 is None:
138 lines2 = splitnewlines(text2)
139 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
140 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
141 if old == new:
144 type = '~'
142 type = '~'
145 yield s, type
143 yield s, type
146 yield s1, '='
144 yield s1, '='
147
145
148 def diffline(revs, a, b, opts):
146 def diffline(revs, a, b, opts):
149 parts = ['diff']
147 parts = ['diff']
150 if opts.git:
148 if opts.git:
151 parts.append('--git')
149 parts.append('--git')
152 if revs and not opts.git:
150 if revs and not opts.git:
153 parts.append(' '.join(["-r %s" % rev for rev in revs]))
151 parts.append(' '.join(["-r %s" % rev for rev in revs]))
154 if opts.git:
152 if opts.git:
155 parts.append('a/%s' % a)
153 parts.append('a/%s' % a)
156 parts.append('b/%s' % b)
154 parts.append('b/%s' % b)
157 else:
155 else:
158 parts.append(a)
156 parts.append(a)
159 return ' '.join(parts) + '\n'
157 return ' '.join(parts) + '\n'
160
158
161 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
159 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
162 def datetag(date, addtab=True):
160 def datetag(date, addtab=True):
163 if not opts.git and not opts.nodates:
161 if not opts.git and not opts.nodates:
164 return '\t%s\n' % date
162 return '\t%s\n' % date
165 if addtab and ' ' in fn1:
163 if addtab and ' ' in fn1:
166 return '\t\n'
164 return '\t\n'
167 return '\n'
165 return '\n'
168
166
169 if not a and not b:
167 if not a and not b:
170 return ""
168 return ""
171 epoch = util.datestr((0, 0))
169 epoch = util.datestr((0, 0))
172
170
173 fn1 = util.pconvert(fn1)
171 fn1 = util.pconvert(fn1)
174 fn2 = util.pconvert(fn2)
172 fn2 = util.pconvert(fn2)
175
173
176 if not opts.text and (util.binary(a) or util.binary(b)):
174 if not opts.text and (util.binary(a) or util.binary(b)):
177 if a and b and len(a) == len(b) and a == b:
175 if a and b and len(a) == len(b) and a == b:
178 return ""
176 return ""
179 l = ['Binary file %s has changed\n' % fn1]
177 l = ['Binary file %s has changed\n' % fn1]
180 elif not a:
178 elif not a:
181 b = splitnewlines(b)
179 b = splitnewlines(b)
182 if a is None:
180 if a is None:
183 l1 = '--- /dev/null%s' % datetag(epoch, False)
181 l1 = '--- /dev/null%s' % datetag(epoch, False)
184 else:
182 else:
185 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
183 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
186 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
184 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
187 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
185 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
188 l = [l1, l2, l3] + ["+" + e for e in b]
186 l = [l1, l2, l3] + ["+" + e for e in b]
189 elif not b:
187 elif not b:
190 a = splitnewlines(a)
188 a = splitnewlines(a)
191 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
189 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
192 if b is None:
190 if b is None:
193 l2 = '+++ /dev/null%s' % datetag(epoch, False)
191 l2 = '+++ /dev/null%s' % datetag(epoch, False)
194 else:
192 else:
195 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
193 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
196 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
194 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
197 l = [l1, l2, l3] + ["-" + e for e in a]
195 l = [l1, l2, l3] + ["-" + e for e in a]
198 else:
196 else:
199 al = splitnewlines(a)
197 al = splitnewlines(a)
200 bl = splitnewlines(b)
198 bl = splitnewlines(b)
201 l = list(_unidiff(a, b, al, bl, opts=opts))
199 l = list(_unidiff(a, b, al, bl, opts=opts))
202 if not l:
200 if not l:
203 return ""
201 return ""
204
202
205 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
203 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
206 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
204 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
207
205
208 for ln in xrange(len(l)):
206 for ln in xrange(len(l)):
209 if l[ln][-1] != '\n':
207 if l[ln][-1] != '\n':
210 l[ln] += "\n\ No newline at end of file\n"
208 l[ln] += "\n\ No newline at end of file\n"
211
209
212 if r:
210 if r:
213 l.insert(0, diffline(r, fn1, fn2, opts))
211 l.insert(0, diffline(r, fn1, fn2, opts))
214
212
215 return "".join(l)
213 return "".join(l)
216
214
217 # creates a headerless unified diff
215 # creates a headerless unified diff
218 # t1 and t2 are the text to be diffed
216 # t1 and t2 are the text to be diffed
219 # l1 and l2 are the text broken up into lines
217 # l1 and l2 are the text broken up into lines
220 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
218 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
221 def contextend(l, len):
219 def contextend(l, len):
222 ret = l + opts.context
220 ret = l + opts.context
223 if ret > len:
221 if ret > len:
224 ret = len
222 ret = len
225 return ret
223 return ret
226
224
227 def contextstart(l):
225 def contextstart(l):
228 ret = l - opts.context
226 ret = l - opts.context
229 if ret < 0:
227 if ret < 0:
230 return 0
228 return 0
231 return ret
229 return ret
232
230
233 lastfunc = [0, '']
231 lastfunc = [0, '']
234 def yieldhunk(hunk):
232 def yieldhunk(hunk):
235 (astart, a2, bstart, b2, delta) = hunk
233 (astart, a2, bstart, b2, delta) = hunk
236 aend = contextend(a2, len(l1))
234 aend = contextend(a2, len(l1))
237 alen = aend - astart
235 alen = aend - astart
238 blen = b2 - bstart + aend - a2
236 blen = b2 - bstart + aend - a2
239
237
240 func = ""
238 func = ""
241 if opts.showfunc:
239 if opts.showfunc:
242 lastpos, func = lastfunc
240 lastpos, func = lastfunc
243 # walk backwards from the start of the context up to the start of
241 # walk backwards from the start of the context up to the start of
244 # the previous hunk context until we find a line starting with an
242 # the previous hunk context until we find a line starting with an
245 # alphanumeric char.
243 # alphanumeric char.
246 for i in xrange(astart - 1, lastpos - 1, -1):
244 for i in xrange(astart - 1, lastpos - 1, -1):
247 if l1[i][0].isalnum():
245 if l1[i][0].isalnum():
248 func = ' ' + l1[i].rstrip()[:40]
246 func = ' ' + l1[i].rstrip()[:40]
249 lastfunc[1] = func
247 lastfunc[1] = func
250 break
248 break
251 # by recording this hunk's starting point as the next place to
249 # by recording this hunk's starting point as the next place to
252 # start looking for function lines, we avoid reading any line in
250 # start looking for function lines, we avoid reading any line in
253 # the file more than once.
251 # the file more than once.
254 lastfunc[0] = astart
252 lastfunc[0] = astart
255
253
256 # zero-length hunk ranges report their start line as one less
254 # zero-length hunk ranges report their start line as one less
257 if alen:
255 if alen:
258 astart += 1
256 astart += 1
259 if blen:
257 if blen:
260 bstart += 1
258 bstart += 1
261
259
262 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
260 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
263 bstart, blen, func)
261 bstart, blen, func)
264 for x in delta:
262 for x in delta:
265 yield x
263 yield x
266 for x in xrange(a2, aend):
264 for x in xrange(a2, aend):
267 yield ' ' + l1[x]
265 yield ' ' + l1[x]
268
266
269 # bdiff.blocks gives us the matching sequences in the files. The loop
267 # bdiff.blocks gives us the matching sequences in the files. The loop
270 # below finds the spaces between those matching sequences and translates
268 # below finds the spaces between those matching sequences and translates
271 # them into diff output.
269 # them into diff output.
272 #
270 #
273 hunk = None
271 hunk = None
274 for s, stype in allblocks(t1, t2, opts, l1, l2):
272 for s, stype in allblocks(t1, t2, opts, l1, l2):
275 if stype != '!':
273 if stype != '!':
276 continue
274 continue
277 delta = []
275 delta = []
278 a1, a2, b1, b2 = s
276 a1, a2, b1, b2 = s
279 old = l1[a1:a2]
277 old = l1[a1:a2]
280 new = l2[b1:b2]
278 new = l2[b1:b2]
281
279
282 astart = contextstart(a1)
280 astart = contextstart(a1)
283 bstart = contextstart(b1)
281 bstart = contextstart(b1)
284 prev = None
282 prev = None
285 if hunk:
283 if hunk:
286 # join with the previous hunk if it falls inside the context
284 # join with the previous hunk if it falls inside the context
287 if astart < hunk[1] + opts.context + 1:
285 if astart < hunk[1] + opts.context + 1:
288 prev = hunk
286 prev = hunk
289 astart = hunk[1]
287 astart = hunk[1]
290 bstart = hunk[3]
288 bstart = hunk[3]
291 else:
289 else:
292 for x in yieldhunk(hunk):
290 for x in yieldhunk(hunk):
293 yield x
291 yield x
294 if prev:
292 if prev:
295 # we've joined the previous hunk, record the new ending points.
293 # we've joined the previous hunk, record the new ending points.
296 hunk[1] = a2
294 hunk[1] = a2
297 hunk[3] = b2
295 hunk[3] = b2
298 delta = hunk[4]
296 delta = hunk[4]
299 else:
297 else:
300 # create a new hunk
298 # create a new hunk
301 hunk = [astart, a2, bstart, b2, delta]
299 hunk = [astart, a2, bstart, b2, delta]
302
300
303 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
301 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
304 delta[len(delta):] = ['-' + x for x in old]
302 delta[len(delta):] = ['-' + x for x in old]
305 delta[len(delta):] = ['+' + x for x in new]
303 delta[len(delta):] = ['+' + x for x in new]
306
304
307 if hunk:
305 if hunk:
308 for x in yieldhunk(hunk):
306 for x in yieldhunk(hunk):
309 yield x
307 yield x
310
308
311 def patchtext(bin):
309 def patchtext(bin):
312 pos = 0
310 pos = 0
313 t = []
311 t = []
314 while pos < len(bin):
312 while pos < len(bin):
315 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
313 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
316 pos += 12
314 pos += 12
317 t.append(bin[pos:pos + l])
315 t.append(bin[pos:pos + l])
318 pos += l
316 pos += l
319 return "".join(t)
317 return "".join(t)
320
318
321 def patch(a, bin):
319 def patch(a, bin):
322 if len(a) == 0:
320 if len(a) == 0:
323 # skip over trivial delta header
321 # skip over trivial delta header
324 return buffer(bin, 12)
322 return buffer(bin, 12)
325 return mpatch.patches(a, [bin])
323 return mpatch.patches(a, [bin])
326
324
327 # similar to difflib.SequenceMatcher.get_matching_blocks
325 # similar to difflib.SequenceMatcher.get_matching_blocks
328 def get_matching_blocks(a, b):
326 def get_matching_blocks(a, b):
329 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
327 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
330
328
331 def trivialdiffheader(length):
329 def trivialdiffheader(length):
332 return struct.pack(">lll", 0, 0, length)
330 return struct.pack(">lll", 0, 0, length)
333
331
334 patches = mpatch.patches
332 patches = mpatch.patches
335 patchedsize = mpatch.patchedsize
333 patchedsize = mpatch.patchedsize
336 textdiff = bdiff.bdiff
334 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now