##// END OF EJS Templates
mdiff: make diffblocks() return all blocks, matching and changed...
Patrick Mezard -
r15526:e6519c62 default
parent child Browse files
Show More
@@ -1,304 +1,309 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import bdiff, mpatch, util
9 import bdiff, mpatch, util
10 import re, struct
10 import re, struct
11
11
12 def splitnewlines(text):
12 def splitnewlines(text):
13 '''like str.splitlines, but only split on newlines.'''
13 '''like str.splitlines, but only split on newlines.'''
14 lines = [l + '\n' for l in text.split('\n')]
14 lines = [l + '\n' for l in text.split('\n')]
15 if lines:
15 if lines:
16 if lines[-1] == '\n':
16 if lines[-1] == '\n':
17 lines.pop()
17 lines.pop()
18 else:
18 else:
19 lines[-1] = lines[-1][:-1]
19 lines[-1] = lines[-1][:-1]
20 return lines
20 return lines
21
21
22 class diffopts(object):
22 class diffopts(object):
23 '''context is the number of context lines
23 '''context is the number of context lines
24 text treats all files as text
24 text treats all files as text
25 showfunc enables diff -p output
25 showfunc enables diff -p output
26 git enables the git extended patch format
26 git enables the git extended patch format
27 nodates removes dates from diff headers
27 nodates removes dates from diff headers
28 ignorews ignores all whitespace changes in the diff
28 ignorews ignores all whitespace changes in the diff
29 ignorewsamount ignores changes in the amount of whitespace
29 ignorewsamount ignores changes in the amount of whitespace
30 ignoreblanklines ignores changes whose lines are all blank
30 ignoreblanklines ignores changes whose lines are all blank
31 upgrade generates git diffs to avoid data loss
31 upgrade generates git diffs to avoid data loss
32 '''
32 '''
33
33
34 defaults = {
34 defaults = {
35 'context': 3,
35 'context': 3,
36 'text': False,
36 'text': False,
37 'showfunc': False,
37 'showfunc': False,
38 'git': False,
38 'git': False,
39 'nodates': False,
39 'nodates': False,
40 'ignorews': False,
40 'ignorews': False,
41 'ignorewsamount': False,
41 'ignorewsamount': False,
42 'ignoreblanklines': False,
42 'ignoreblanklines': False,
43 'upgrade': False,
43 'upgrade': False,
44 }
44 }
45
45
46 __slots__ = defaults.keys()
46 __slots__ = defaults.keys()
47
47
48 def __init__(self, **opts):
48 def __init__(self, **opts):
49 for k in self.__slots__:
49 for k in self.__slots__:
50 v = opts.get(k)
50 v = opts.get(k)
51 if v is None:
51 if v is None:
52 v = self.defaults[k]
52 v = self.defaults[k]
53 setattr(self, k, v)
53 setattr(self, k, v)
54
54
55 try:
55 try:
56 self.context = int(self.context)
56 self.context = int(self.context)
57 except ValueError:
57 except ValueError:
58 raise util.Abort(_('diff context lines count must be '
58 raise util.Abort(_('diff context lines count must be '
59 'an integer, not %r') % self.context)
59 'an integer, not %r') % self.context)
60
60
61 def copy(self, **kwargs):
61 def copy(self, **kwargs):
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 opts.update(kwargs)
63 opts.update(kwargs)
64 return diffopts(**opts)
64 return diffopts(**opts)
65
65
66 defaultopts = diffopts()
66 defaultopts = diffopts()
67
67
68 def wsclean(opts, text, blank=True):
68 def wsclean(opts, text, blank=True):
69 if opts.ignorews:
69 if opts.ignorews:
70 text = re.sub('[ \t\r]+', '', text)
70 text = re.sub('[ \t\r]+', '', text)
71 elif opts.ignorewsamount:
71 elif opts.ignorewsamount:
72 text = re.sub('[ \t\r]+', ' ', text)
72 text = re.sub('[ \t\r]+', ' ', text)
73 text = text.replace(' \n', '\n')
73 text = text.replace(' \n', '\n')
74 if blank and opts.ignoreblanklines:
74 if blank and opts.ignoreblanklines:
75 text = re.sub('\n+', '\n', text).strip('\n')
75 text = re.sub('\n+', '\n', text).strip('\n')
76 return text
76 return text
77
77
78 def diffblocks(text1, text2, opts=None, lines1=None, lines2=None):
78 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
79 """Return changed blocks between text1 and text2, the blocks in-between
79 """Return (block, type) tuples, where block is an mdiff.blocks
80 those emitted by bdiff.blocks. Take in account the whitespace normalization
80 line entry. type is '=' for blocks matching exactly one another
81 rules defined by opts.
81 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
82 line1 and line2 are text1 and text2 split with splitnewlines() if they are
82 matching only after having filtered blank lines.
83 already available.
83 line1 and line2 are text1 and text2 split with splitnewlines() if
84 they are already available.
84 """
85 """
85 if opts is None:
86 if opts is None:
86 opts = defaultopts
87 opts = defaultopts
87 if lines1 is None:
88 if lines1 is None:
88 lines1 = splitnewlines(text1)
89 lines1 = splitnewlines(text1)
89 if lines2 is None:
90 if lines2 is None:
90 lines2 = splitnewlines(text2)
91 lines2 = splitnewlines(text2)
91 if opts.ignorews or opts.ignorewsamount:
92 if opts.ignorews or opts.ignorewsamount:
92 text1 = wsclean(opts, text1, False)
93 text1 = wsclean(opts, text1, False)
93 text2 = wsclean(opts, text2, False)
94 text2 = wsclean(opts, text2, False)
94 diff = bdiff.blocks(text1, text2)
95 diff = bdiff.blocks(text1, text2)
95 for i, s1 in enumerate(diff):
96 for i, s1 in enumerate(diff):
96 # The first match is special.
97 # The first match is special.
97 # we've either found a match starting at line 0 or a match later
98 # we've either found a match starting at line 0 or a match later
98 # in the file. If it starts later, old and new below will both be
99 # in the file. If it starts later, old and new below will both be
99 # empty and we'll continue to the next match.
100 # empty and we'll continue to the next match.
100 if i > 0:
101 if i > 0:
101 s = diff[i - 1]
102 s = diff[i - 1]
102 else:
103 else:
103 s = [0, 0, 0, 0]
104 s = [0, 0, 0, 0]
104 s = [s[1], s1[0], s[3], s1[2]]
105 s = [s[1], s1[0], s[3], s1[2]]
105 old = lines1[s[0]:s[1]]
106 old = lines1[s[0]:s[1]]
106 new = lines2[s[2]:s[3]]
107 new = lines2[s[2]:s[3]]
107
108
108 # bdiff sometimes gives huge matches past eof, this check eats them,
109 # bdiff sometimes gives huge matches past eof, this check eats them,
109 # and deals with the special first match case described above
110 # and deals with the special first match case described above
110 if not old and not new:
111 if old or new:
111 continue
112 type = '!'
112
113 if opts.ignoreblanklines:
113 if opts.ignoreblanklines:
114 cold = wsclean(opts, "".join(old))
114 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
115 cnew = wsclean(opts, "".join(new))
115 continue
116 if cold == cnew:
116 yield s
117 type = '~'
118 yield s, type
119 yield s1, '='
117
120
118 def diffline(revs, a, b, opts):
121 def diffline(revs, a, b, opts):
119 parts = ['diff']
122 parts = ['diff']
120 if opts.git:
123 if opts.git:
121 parts.append('--git')
124 parts.append('--git')
122 if revs and not opts.git:
125 if revs and not opts.git:
123 parts.append(' '.join(["-r %s" % rev for rev in revs]))
126 parts.append(' '.join(["-r %s" % rev for rev in revs]))
124 if opts.git:
127 if opts.git:
125 parts.append('a/%s' % a)
128 parts.append('a/%s' % a)
126 parts.append('b/%s' % b)
129 parts.append('b/%s' % b)
127 else:
130 else:
128 parts.append(a)
131 parts.append(a)
129 return ' '.join(parts) + '\n'
132 return ' '.join(parts) + '\n'
130
133
131 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
134 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
132 def datetag(date, addtab=True):
135 def datetag(date, addtab=True):
133 if not opts.git and not opts.nodates:
136 if not opts.git and not opts.nodates:
134 return '\t%s\n' % date
137 return '\t%s\n' % date
135 if addtab and ' ' in fn1:
138 if addtab and ' ' in fn1:
136 return '\t\n'
139 return '\t\n'
137 return '\n'
140 return '\n'
138
141
139 if not a and not b:
142 if not a and not b:
140 return ""
143 return ""
141 epoch = util.datestr((0, 0))
144 epoch = util.datestr((0, 0))
142
145
143 fn1 = util.pconvert(fn1)
146 fn1 = util.pconvert(fn1)
144 fn2 = util.pconvert(fn2)
147 fn2 = util.pconvert(fn2)
145
148
146 if not opts.text and (util.binary(a) or util.binary(b)):
149 if not opts.text and (util.binary(a) or util.binary(b)):
147 if a and b and len(a) == len(b) and a == b:
150 if a and b and len(a) == len(b) and a == b:
148 return ""
151 return ""
149 l = ['Binary file %s has changed\n' % fn1]
152 l = ['Binary file %s has changed\n' % fn1]
150 elif not a:
153 elif not a:
151 b = splitnewlines(b)
154 b = splitnewlines(b)
152 if a is None:
155 if a is None:
153 l1 = '--- /dev/null%s' % datetag(epoch, False)
156 l1 = '--- /dev/null%s' % datetag(epoch, False)
154 else:
157 else:
155 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
158 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
156 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
159 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
157 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
160 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
158 l = [l1, l2, l3] + ["+" + e for e in b]
161 l = [l1, l2, l3] + ["+" + e for e in b]
159 elif not b:
162 elif not b:
160 a = splitnewlines(a)
163 a = splitnewlines(a)
161 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
164 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
162 if b is None:
165 if b is None:
163 l2 = '+++ /dev/null%s' % datetag(epoch, False)
166 l2 = '+++ /dev/null%s' % datetag(epoch, False)
164 else:
167 else:
165 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
168 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
166 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
169 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
167 l = [l1, l2, l3] + ["-" + e for e in a]
170 l = [l1, l2, l3] + ["-" + e for e in a]
168 else:
171 else:
169 al = splitnewlines(a)
172 al = splitnewlines(a)
170 bl = splitnewlines(b)
173 bl = splitnewlines(b)
171 l = list(_unidiff(a, b, al, bl, opts=opts))
174 l = list(_unidiff(a, b, al, bl, opts=opts))
172 if not l:
175 if not l:
173 return ""
176 return ""
174
177
175 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
178 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
176 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
179 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
177
180
178 for ln in xrange(len(l)):
181 for ln in xrange(len(l)):
179 if l[ln][-1] != '\n':
182 if l[ln][-1] != '\n':
180 l[ln] += "\n\ No newline at end of file\n"
183 l[ln] += "\n\ No newline at end of file\n"
181
184
182 if r:
185 if r:
183 l.insert(0, diffline(r, fn1, fn2, opts))
186 l.insert(0, diffline(r, fn1, fn2, opts))
184
187
185 return "".join(l)
188 return "".join(l)
186
189
187 # creates a headerless unified diff
190 # creates a headerless unified diff
188 # t1 and t2 are the text to be diffed
191 # t1 and t2 are the text to be diffed
189 # l1 and l2 are the text broken up into lines
192 # l1 and l2 are the text broken up into lines
190 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
193 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
191 def contextend(l, len):
194 def contextend(l, len):
192 ret = l + opts.context
195 ret = l + opts.context
193 if ret > len:
196 if ret > len:
194 ret = len
197 ret = len
195 return ret
198 return ret
196
199
197 def contextstart(l):
200 def contextstart(l):
198 ret = l - opts.context
201 ret = l - opts.context
199 if ret < 0:
202 if ret < 0:
200 return 0
203 return 0
201 return ret
204 return ret
202
205
203 lastfunc = [0, '']
206 lastfunc = [0, '']
204 def yieldhunk(hunk):
207 def yieldhunk(hunk):
205 (astart, a2, bstart, b2, delta) = hunk
208 (astart, a2, bstart, b2, delta) = hunk
206 aend = contextend(a2, len(l1))
209 aend = contextend(a2, len(l1))
207 alen = aend - astart
210 alen = aend - astart
208 blen = b2 - bstart + aend - a2
211 blen = b2 - bstart + aend - a2
209
212
210 func = ""
213 func = ""
211 if opts.showfunc:
214 if opts.showfunc:
212 lastpos, func = lastfunc
215 lastpos, func = lastfunc
213 # walk backwards from the start of the context up to the start of
216 # walk backwards from the start of the context up to the start of
214 # the previous hunk context until we find a line starting with an
217 # the previous hunk context until we find a line starting with an
215 # alphanumeric char.
218 # alphanumeric char.
216 for i in xrange(astart - 1, lastpos - 1, -1):
219 for i in xrange(astart - 1, lastpos - 1, -1):
217 if l1[i][0].isalnum():
220 if l1[i][0].isalnum():
218 func = ' ' + l1[i].rstrip()[:40]
221 func = ' ' + l1[i].rstrip()[:40]
219 lastfunc[1] = func
222 lastfunc[1] = func
220 break
223 break
221 # by recording this hunk's starting point as the next place to
224 # by recording this hunk's starting point as the next place to
222 # start looking for function lines, we avoid reading any line in
225 # start looking for function lines, we avoid reading any line in
223 # the file more than once.
226 # the file more than once.
224 lastfunc[0] = astart
227 lastfunc[0] = astart
225
228
226 # zero-length hunk ranges report their start line as one less
229 # zero-length hunk ranges report their start line as one less
227 if alen:
230 if alen:
228 astart += 1
231 astart += 1
229 if blen:
232 if blen:
230 bstart += 1
233 bstart += 1
231
234
232 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
235 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
233 bstart, blen, func)
236 bstart, blen, func)
234 for x in delta:
237 for x in delta:
235 yield x
238 yield x
236 for x in xrange(a2, aend):
239 for x in xrange(a2, aend):
237 yield ' ' + l1[x]
240 yield ' ' + l1[x]
238
241
239 # bdiff.blocks gives us the matching sequences in the files. The loop
242 # bdiff.blocks gives us the matching sequences in the files. The loop
240 # below finds the spaces between those matching sequences and translates
243 # below finds the spaces between those matching sequences and translates
241 # them into diff output.
244 # them into diff output.
242 #
245 #
243 hunk = None
246 hunk = None
244 for s in diffblocks(t1, t2, opts, l1, l2):
247 for s, stype in allblocks(t1, t2, opts, l1, l2):
248 if stype != '!':
249 continue
245 delta = []
250 delta = []
246 a1, a2, b1, b2 = s
251 a1, a2, b1, b2 = s
247 old = l1[a1:a2]
252 old = l1[a1:a2]
248 new = l2[b1:b2]
253 new = l2[b1:b2]
249
254
250 astart = contextstart(a1)
255 astart = contextstart(a1)
251 bstart = contextstart(b1)
256 bstart = contextstart(b1)
252 prev = None
257 prev = None
253 if hunk:
258 if hunk:
254 # join with the previous hunk if it falls inside the context
259 # join with the previous hunk if it falls inside the context
255 if astart < hunk[1] + opts.context + 1:
260 if astart < hunk[1] + opts.context + 1:
256 prev = hunk
261 prev = hunk
257 astart = hunk[1]
262 astart = hunk[1]
258 bstart = hunk[3]
263 bstart = hunk[3]
259 else:
264 else:
260 for x in yieldhunk(hunk):
265 for x in yieldhunk(hunk):
261 yield x
266 yield x
262 if prev:
267 if prev:
263 # we've joined the previous hunk, record the new ending points.
268 # we've joined the previous hunk, record the new ending points.
264 hunk[1] = a2
269 hunk[1] = a2
265 hunk[3] = b2
270 hunk[3] = b2
266 delta = hunk[4]
271 delta = hunk[4]
267 else:
272 else:
268 # create a new hunk
273 # create a new hunk
269 hunk = [astart, a2, bstart, b2, delta]
274 hunk = [astart, a2, bstart, b2, delta]
270
275
271 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
276 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
272 delta[len(delta):] = ['-' + x for x in old]
277 delta[len(delta):] = ['-' + x for x in old]
273 delta[len(delta):] = ['+' + x for x in new]
278 delta[len(delta):] = ['+' + x for x in new]
274
279
275 if hunk:
280 if hunk:
276 for x in yieldhunk(hunk):
281 for x in yieldhunk(hunk):
277 yield x
282 yield x
278
283
279 def patchtext(bin):
284 def patchtext(bin):
280 pos = 0
285 pos = 0
281 t = []
286 t = []
282 while pos < len(bin):
287 while pos < len(bin):
283 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
288 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
284 pos += 12
289 pos += 12
285 t.append(bin[pos:pos + l])
290 t.append(bin[pos:pos + l])
286 pos += l
291 pos += l
287 return "".join(t)
292 return "".join(t)
288
293
289 def patch(a, bin):
294 def patch(a, bin):
290 if len(a) == 0:
295 if len(a) == 0:
291 # skip over trivial delta header
296 # skip over trivial delta header
292 return buffer(bin, 12)
297 return buffer(bin, 12)
293 return mpatch.patches(a, [bin])
298 return mpatch.patches(a, [bin])
294
299
295 # similar to difflib.SequenceMatcher.get_matching_blocks
300 # similar to difflib.SequenceMatcher.get_matching_blocks
296 def get_matching_blocks(a, b):
301 def get_matching_blocks(a, b):
297 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
302 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
298
303
299 def trivialdiffheader(length):
304 def trivialdiffheader(length):
300 return struct.pack(">lll", 0, 0, length)
305 return struct.pack(">lll", 0, 0, length)
301
306
302 patches = mpatch.patches
307 patches = mpatch.patches
303 patchedsize = mpatch.patchedsize
308 patchedsize = mpatch.patchedsize
304 textdiff = bdiff.bdiff
309 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now