##// END OF EJS Templates
mdiff: extract blocks whitespace normalization in diffblocks()...
Patrick Mezard -
r15525:935bf2e7 default
parent child Browse files
Show More
@@ -1,290 +1,304
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import bdiff, mpatch, util
9 import bdiff, mpatch, util
10 import re, struct
10 import re, struct
11
11
12 def splitnewlines(text):
12 def splitnewlines(text):
13 '''like str.splitlines, but only split on newlines.'''
13 '''like str.splitlines, but only split on newlines.'''
14 lines = [l + '\n' for l in text.split('\n')]
14 lines = [l + '\n' for l in text.split('\n')]
15 if lines:
15 if lines:
16 if lines[-1] == '\n':
16 if lines[-1] == '\n':
17 lines.pop()
17 lines.pop()
18 else:
18 else:
19 lines[-1] = lines[-1][:-1]
19 lines[-1] = lines[-1][:-1]
20 return lines
20 return lines
21
21
22 class diffopts(object):
22 class diffopts(object):
23 '''context is the number of context lines
23 '''context is the number of context lines
24 text treats all files as text
24 text treats all files as text
25 showfunc enables diff -p output
25 showfunc enables diff -p output
26 git enables the git extended patch format
26 git enables the git extended patch format
27 nodates removes dates from diff headers
27 nodates removes dates from diff headers
28 ignorews ignores all whitespace changes in the diff
28 ignorews ignores all whitespace changes in the diff
29 ignorewsamount ignores changes in the amount of whitespace
29 ignorewsamount ignores changes in the amount of whitespace
30 ignoreblanklines ignores changes whose lines are all blank
30 ignoreblanklines ignores changes whose lines are all blank
31 upgrade generates git diffs to avoid data loss
31 upgrade generates git diffs to avoid data loss
32 '''
32 '''
33
33
34 defaults = {
34 defaults = {
35 'context': 3,
35 'context': 3,
36 'text': False,
36 'text': False,
37 'showfunc': False,
37 'showfunc': False,
38 'git': False,
38 'git': False,
39 'nodates': False,
39 'nodates': False,
40 'ignorews': False,
40 'ignorews': False,
41 'ignorewsamount': False,
41 'ignorewsamount': False,
42 'ignoreblanklines': False,
42 'ignoreblanklines': False,
43 'upgrade': False,
43 'upgrade': False,
44 }
44 }
45
45
46 __slots__ = defaults.keys()
46 __slots__ = defaults.keys()
47
47
48 def __init__(self, **opts):
48 def __init__(self, **opts):
49 for k in self.__slots__:
49 for k in self.__slots__:
50 v = opts.get(k)
50 v = opts.get(k)
51 if v is None:
51 if v is None:
52 v = self.defaults[k]
52 v = self.defaults[k]
53 setattr(self, k, v)
53 setattr(self, k, v)
54
54
55 try:
55 try:
56 self.context = int(self.context)
56 self.context = int(self.context)
57 except ValueError:
57 except ValueError:
58 raise util.Abort(_('diff context lines count must be '
58 raise util.Abort(_('diff context lines count must be '
59 'an integer, not %r') % self.context)
59 'an integer, not %r') % self.context)
60
60
61 def copy(self, **kwargs):
61 def copy(self, **kwargs):
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 opts.update(kwargs)
63 opts.update(kwargs)
64 return diffopts(**opts)
64 return diffopts(**opts)
65
65
66 defaultopts = diffopts()
66 defaultopts = diffopts()
67
67
68 def wsclean(opts, text, blank=True):
68 def wsclean(opts, text, blank=True):
69 if opts.ignorews:
69 if opts.ignorews:
70 text = re.sub('[ \t\r]+', '', text)
70 text = re.sub('[ \t\r]+', '', text)
71 elif opts.ignorewsamount:
71 elif opts.ignorewsamount:
72 text = re.sub('[ \t\r]+', ' ', text)
72 text = re.sub('[ \t\r]+', ' ', text)
73 text = text.replace(' \n', '\n')
73 text = text.replace(' \n', '\n')
74 if blank and opts.ignoreblanklines:
74 if blank and opts.ignoreblanklines:
75 text = re.sub('\n+', '\n', text).strip('\n')
75 text = re.sub('\n+', '\n', text).strip('\n')
76 return text
76 return text
77
77
78 def diffblocks(text1, text2, opts=None, lines1=None, lines2=None):
79 """Return changed blocks between text1 and text2, the blocks in-between
80 those emitted by bdiff.blocks. Take in account the whitespace normalization
81 rules defined by opts.
82 line1 and line2 are text1 and text2 split with splitnewlines() if they are
83 already available.
84 """
85 if opts is None:
86 opts = defaultopts
87 if lines1 is None:
88 lines1 = splitnewlines(text1)
89 if lines2 is None:
90 lines2 = splitnewlines(text2)
91 if opts.ignorews or opts.ignorewsamount:
92 text1 = wsclean(opts, text1, False)
93 text2 = wsclean(opts, text2, False)
94 diff = bdiff.blocks(text1, text2)
95 for i, s1 in enumerate(diff):
96 # The first match is special.
97 # we've either found a match starting at line 0 or a match later
98 # in the file. If it starts later, old and new below will both be
99 # empty and we'll continue to the next match.
100 if i > 0:
101 s = diff[i - 1]
102 else:
103 s = [0, 0, 0, 0]
104 s = [s[1], s1[0], s[3], s1[2]]
105 old = lines1[s[0]:s[1]]
106 new = lines2[s[2]:s[3]]
107
108 # bdiff sometimes gives huge matches past eof, this check eats them,
109 # and deals with the special first match case described above
110 if not old and not new:
111 continue
112
113 if opts.ignoreblanklines:
114 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
115 continue
116 yield s
117
78 def diffline(revs, a, b, opts):
118 def diffline(revs, a, b, opts):
79 parts = ['diff']
119 parts = ['diff']
80 if opts.git:
120 if opts.git:
81 parts.append('--git')
121 parts.append('--git')
82 if revs and not opts.git:
122 if revs and not opts.git:
83 parts.append(' '.join(["-r %s" % rev for rev in revs]))
123 parts.append(' '.join(["-r %s" % rev for rev in revs]))
84 if opts.git:
124 if opts.git:
85 parts.append('a/%s' % a)
125 parts.append('a/%s' % a)
86 parts.append('b/%s' % b)
126 parts.append('b/%s' % b)
87 else:
127 else:
88 parts.append(a)
128 parts.append(a)
89 return ' '.join(parts) + '\n'
129 return ' '.join(parts) + '\n'
90
130
91 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
131 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
92 def datetag(date, addtab=True):
132 def datetag(date, addtab=True):
93 if not opts.git and not opts.nodates:
133 if not opts.git and not opts.nodates:
94 return '\t%s\n' % date
134 return '\t%s\n' % date
95 if addtab and ' ' in fn1:
135 if addtab and ' ' in fn1:
96 return '\t\n'
136 return '\t\n'
97 return '\n'
137 return '\n'
98
138
99 if not a and not b:
139 if not a and not b:
100 return ""
140 return ""
101 epoch = util.datestr((0, 0))
141 epoch = util.datestr((0, 0))
102
142
103 fn1 = util.pconvert(fn1)
143 fn1 = util.pconvert(fn1)
104 fn2 = util.pconvert(fn2)
144 fn2 = util.pconvert(fn2)
105
145
106 if not opts.text and (util.binary(a) or util.binary(b)):
146 if not opts.text and (util.binary(a) or util.binary(b)):
107 if a and b and len(a) == len(b) and a == b:
147 if a and b and len(a) == len(b) and a == b:
108 return ""
148 return ""
109 l = ['Binary file %s has changed\n' % fn1]
149 l = ['Binary file %s has changed\n' % fn1]
110 elif not a:
150 elif not a:
111 b = splitnewlines(b)
151 b = splitnewlines(b)
112 if a is None:
152 if a is None:
113 l1 = '--- /dev/null%s' % datetag(epoch, False)
153 l1 = '--- /dev/null%s' % datetag(epoch, False)
114 else:
154 else:
115 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
155 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
116 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
156 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
117 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
157 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
118 l = [l1, l2, l3] + ["+" + e for e in b]
158 l = [l1, l2, l3] + ["+" + e for e in b]
119 elif not b:
159 elif not b:
120 a = splitnewlines(a)
160 a = splitnewlines(a)
121 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
161 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
122 if b is None:
162 if b is None:
123 l2 = '+++ /dev/null%s' % datetag(epoch, False)
163 l2 = '+++ /dev/null%s' % datetag(epoch, False)
124 else:
164 else:
125 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
165 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
126 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
166 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
127 l = [l1, l2, l3] + ["-" + e for e in a]
167 l = [l1, l2, l3] + ["-" + e for e in a]
128 else:
168 else:
129 al = splitnewlines(a)
169 al = splitnewlines(a)
130 bl = splitnewlines(b)
170 bl = splitnewlines(b)
131 l = list(_unidiff(a, b, al, bl, opts=opts))
171 l = list(_unidiff(a, b, al, bl, opts=opts))
132 if not l:
172 if not l:
133 return ""
173 return ""
134
174
135 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
175 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
136 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
176 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
137
177
138 for ln in xrange(len(l)):
178 for ln in xrange(len(l)):
139 if l[ln][-1] != '\n':
179 if l[ln][-1] != '\n':
140 l[ln] += "\n\ No newline at end of file\n"
180 l[ln] += "\n\ No newline at end of file\n"
141
181
142 if r:
182 if r:
143 l.insert(0, diffline(r, fn1, fn2, opts))
183 l.insert(0, diffline(r, fn1, fn2, opts))
144
184
145 return "".join(l)
185 return "".join(l)
146
186
147 # creates a headerless unified diff
187 # creates a headerless unified diff
148 # t1 and t2 are the text to be diffed
188 # t1 and t2 are the text to be diffed
149 # l1 and l2 are the text broken up into lines
189 # l1 and l2 are the text broken up into lines
150 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
190 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
151 def contextend(l, len):
191 def contextend(l, len):
152 ret = l + opts.context
192 ret = l + opts.context
153 if ret > len:
193 if ret > len:
154 ret = len
194 ret = len
155 return ret
195 return ret
156
196
157 def contextstart(l):
197 def contextstart(l):
158 ret = l - opts.context
198 ret = l - opts.context
159 if ret < 0:
199 if ret < 0:
160 return 0
200 return 0
161 return ret
201 return ret
162
202
163 lastfunc = [0, '']
203 lastfunc = [0, '']
164 def yieldhunk(hunk):
204 def yieldhunk(hunk):
165 (astart, a2, bstart, b2, delta) = hunk
205 (astart, a2, bstart, b2, delta) = hunk
166 aend = contextend(a2, len(l1))
206 aend = contextend(a2, len(l1))
167 alen = aend - astart
207 alen = aend - astart
168 blen = b2 - bstart + aend - a2
208 blen = b2 - bstart + aend - a2
169
209
170 func = ""
210 func = ""
171 if opts.showfunc:
211 if opts.showfunc:
172 lastpos, func = lastfunc
212 lastpos, func = lastfunc
173 # walk backwards from the start of the context up to the start of
213 # walk backwards from the start of the context up to the start of
174 # the previous hunk context until we find a line starting with an
214 # the previous hunk context until we find a line starting with an
175 # alphanumeric char.
215 # alphanumeric char.
176 for i in xrange(astart - 1, lastpos - 1, -1):
216 for i in xrange(astart - 1, lastpos - 1, -1):
177 if l1[i][0].isalnum():
217 if l1[i][0].isalnum():
178 func = ' ' + l1[i].rstrip()[:40]
218 func = ' ' + l1[i].rstrip()[:40]
179 lastfunc[1] = func
219 lastfunc[1] = func
180 break
220 break
181 # by recording this hunk's starting point as the next place to
221 # by recording this hunk's starting point as the next place to
182 # start looking for function lines, we avoid reading any line in
222 # start looking for function lines, we avoid reading any line in
183 # the file more than once.
223 # the file more than once.
184 lastfunc[0] = astart
224 lastfunc[0] = astart
185
225
186 # zero-length hunk ranges report their start line as one less
226 # zero-length hunk ranges report their start line as one less
187 if alen:
227 if alen:
188 astart += 1
228 astart += 1
189 if blen:
229 if blen:
190 bstart += 1
230 bstart += 1
191
231
192 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
232 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
193 bstart, blen, func)
233 bstart, blen, func)
194 for x in delta:
234 for x in delta:
195 yield x
235 yield x
196 for x in xrange(a2, aend):
236 for x in xrange(a2, aend):
197 yield ' ' + l1[x]
237 yield ' ' + l1[x]
198
238
199 # bdiff.blocks gives us the matching sequences in the files. The loop
239 # bdiff.blocks gives us the matching sequences in the files. The loop
200 # below finds the spaces between those matching sequences and translates
240 # below finds the spaces between those matching sequences and translates
201 # them into diff output.
241 # them into diff output.
202 #
242 #
203 if opts.ignorews or opts.ignorewsamount:
204 t1 = wsclean(opts, t1, False)
205 t2 = wsclean(opts, t2, False)
206
207 diff = bdiff.blocks(t1, t2)
208 hunk = None
243 hunk = None
209 for i, s1 in enumerate(diff):
244 for s in diffblocks(t1, t2, opts, l1, l2):
210 # The first match is special.
211 # we've either found a match starting at line 0 or a match later
212 # in the file. If it starts later, old and new below will both be
213 # empty and we'll continue to the next match.
214 if i > 0:
215 s = diff[i - 1]
216 else:
217 s = [0, 0, 0, 0]
218 delta = []
245 delta = []
219 a1 = s[1]
246 a1, a2, b1, b2 = s
220 a2 = s1[0]
221 b1 = s[3]
222 b2 = s1[2]
223
224 old = l1[a1:a2]
247 old = l1[a1:a2]
225 new = l2[b1:b2]
248 new = l2[b1:b2]
226
249
227 # bdiff sometimes gives huge matches past eof, this check eats them,
228 # and deals with the special first match case described above
229 if not old and not new:
230 continue
231
232 if opts.ignoreblanklines:
233 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
234 continue
235
236 astart = contextstart(a1)
250 astart = contextstart(a1)
237 bstart = contextstart(b1)
251 bstart = contextstart(b1)
238 prev = None
252 prev = None
239 if hunk:
253 if hunk:
240 # join with the previous hunk if it falls inside the context
254 # join with the previous hunk if it falls inside the context
241 if astart < hunk[1] + opts.context + 1:
255 if astart < hunk[1] + opts.context + 1:
242 prev = hunk
256 prev = hunk
243 astart = hunk[1]
257 astart = hunk[1]
244 bstart = hunk[3]
258 bstart = hunk[3]
245 else:
259 else:
246 for x in yieldhunk(hunk):
260 for x in yieldhunk(hunk):
247 yield x
261 yield x
248 if prev:
262 if prev:
249 # we've joined the previous hunk, record the new ending points.
263 # we've joined the previous hunk, record the new ending points.
250 hunk[1] = a2
264 hunk[1] = a2
251 hunk[3] = b2
265 hunk[3] = b2
252 delta = hunk[4]
266 delta = hunk[4]
253 else:
267 else:
254 # create a new hunk
268 # create a new hunk
255 hunk = [astart, a2, bstart, b2, delta]
269 hunk = [astart, a2, bstart, b2, delta]
256
270
257 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
271 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
258 delta[len(delta):] = ['-' + x for x in old]
272 delta[len(delta):] = ['-' + x for x in old]
259 delta[len(delta):] = ['+' + x for x in new]
273 delta[len(delta):] = ['+' + x for x in new]
260
274
261 if hunk:
275 if hunk:
262 for x in yieldhunk(hunk):
276 for x in yieldhunk(hunk):
263 yield x
277 yield x
264
278
265 def patchtext(bin):
279 def patchtext(bin):
266 pos = 0
280 pos = 0
267 t = []
281 t = []
268 while pos < len(bin):
282 while pos < len(bin):
269 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
283 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
270 pos += 12
284 pos += 12
271 t.append(bin[pos:pos + l])
285 t.append(bin[pos:pos + l])
272 pos += l
286 pos += l
273 return "".join(t)
287 return "".join(t)
274
288
275 def patch(a, bin):
289 def patch(a, bin):
276 if len(a) == 0:
290 if len(a) == 0:
277 # skip over trivial delta header
291 # skip over trivial delta header
278 return buffer(bin, 12)
292 return buffer(bin, 12)
279 return mpatch.patches(a, [bin])
293 return mpatch.patches(a, [bin])
280
294
281 # similar to difflib.SequenceMatcher.get_matching_blocks
295 # similar to difflib.SequenceMatcher.get_matching_blocks
282 def get_matching_blocks(a, b):
296 def get_matching_blocks(a, b):
283 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
297 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
284
298
285 def trivialdiffheader(length):
299 def trivialdiffheader(length):
286 return struct.pack(">lll", 0, 0, length)
300 return struct.pack(">lll", 0, 0, length)
287
301
288 patches = mpatch.patches
302 patches = mpatch.patches
289 patchedsize = mpatch.patchedsize
303 patchedsize = mpatch.patchedsize
290 textdiff = bdiff.bdiff
304 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now