##// END OF EJS Templates
mdiff: extract blocks whitespace normalization in diffblocks()...
Patrick Mezard -
r15525:935bf2e7 default
parent child Browse files
Show More
@@ -1,290 +1,304
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import bdiff, mpatch, util
10 10 import re, struct
11 11
12 12 def splitnewlines(text):
13 13 '''like str.splitlines, but only split on newlines.'''
14 14 lines = [l + '\n' for l in text.split('\n')]
15 15 if lines:
16 16 if lines[-1] == '\n':
17 17 lines.pop()
18 18 else:
19 19 lines[-1] = lines[-1][:-1]
20 20 return lines
21 21
22 22 class diffopts(object):
23 23 '''context is the number of context lines
24 24 text treats all files as text
25 25 showfunc enables diff -p output
26 26 git enables the git extended patch format
27 27 nodates removes dates from diff headers
28 28 ignorews ignores all whitespace changes in the diff
29 29 ignorewsamount ignores changes in the amount of whitespace
30 30 ignoreblanklines ignores changes whose lines are all blank
31 31 upgrade generates git diffs to avoid data loss
32 32 '''
33 33
34 34 defaults = {
35 35 'context': 3,
36 36 'text': False,
37 37 'showfunc': False,
38 38 'git': False,
39 39 'nodates': False,
40 40 'ignorews': False,
41 41 'ignorewsamount': False,
42 42 'ignoreblanklines': False,
43 43 'upgrade': False,
44 44 }
45 45
46 46 __slots__ = defaults.keys()
47 47
48 48 def __init__(self, **opts):
49 49 for k in self.__slots__:
50 50 v = opts.get(k)
51 51 if v is None:
52 52 v = self.defaults[k]
53 53 setattr(self, k, v)
54 54
55 55 try:
56 56 self.context = int(self.context)
57 57 except ValueError:
58 58 raise util.Abort(_('diff context lines count must be '
59 59 'an integer, not %r') % self.context)
60 60
61 61 def copy(self, **kwargs):
62 62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 63 opts.update(kwargs)
64 64 return diffopts(**opts)
65 65
66 66 defaultopts = diffopts()
67 67
68 68 def wsclean(opts, text, blank=True):
69 69 if opts.ignorews:
70 70 text = re.sub('[ \t\r]+', '', text)
71 71 elif opts.ignorewsamount:
72 72 text = re.sub('[ \t\r]+', ' ', text)
73 73 text = text.replace(' \n', '\n')
74 74 if blank and opts.ignoreblanklines:
75 75 text = re.sub('\n+', '\n', text).strip('\n')
76 76 return text
77 77
78 def diffblocks(text1, text2, opts=None, lines1=None, lines2=None):
79 """Return changed blocks between text1 and text2, the blocks in-between
80 those emitted by bdiff.blocks. Take in account the whitespace normalization
81 rules defined by opts.
82 line1 and line2 are text1 and text2 split with splitnewlines() if they are
83 already available.
84 """
85 if opts is None:
86 opts = defaultopts
87 if lines1 is None:
88 lines1 = splitnewlines(text1)
89 if lines2 is None:
90 lines2 = splitnewlines(text2)
91 if opts.ignorews or opts.ignorewsamount:
92 text1 = wsclean(opts, text1, False)
93 text2 = wsclean(opts, text2, False)
94 diff = bdiff.blocks(text1, text2)
95 for i, s1 in enumerate(diff):
96 # The first match is special.
97 # we've either found a match starting at line 0 or a match later
98 # in the file. If it starts later, old and new below will both be
99 # empty and we'll continue to the next match.
100 if i > 0:
101 s = diff[i - 1]
102 else:
103 s = [0, 0, 0, 0]
104 s = [s[1], s1[0], s[3], s1[2]]
105 old = lines1[s[0]:s[1]]
106 new = lines2[s[2]:s[3]]
107
108 # bdiff sometimes gives huge matches past eof, this check eats them,
109 # and deals with the special first match case described above
110 if not old and not new:
111 continue
112
113 if opts.ignoreblanklines:
114 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
115 continue
116 yield s
117
78 118 def diffline(revs, a, b, opts):
79 119 parts = ['diff']
80 120 if opts.git:
81 121 parts.append('--git')
82 122 if revs and not opts.git:
83 123 parts.append(' '.join(["-r %s" % rev for rev in revs]))
84 124 if opts.git:
85 125 parts.append('a/%s' % a)
86 126 parts.append('b/%s' % b)
87 127 else:
88 128 parts.append(a)
89 129 return ' '.join(parts) + '\n'
90 130
91 131 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
92 132 def datetag(date, addtab=True):
93 133 if not opts.git and not opts.nodates:
94 134 return '\t%s\n' % date
95 135 if addtab and ' ' in fn1:
96 136 return '\t\n'
97 137 return '\n'
98 138
99 139 if not a and not b:
100 140 return ""
101 141 epoch = util.datestr((0, 0))
102 142
103 143 fn1 = util.pconvert(fn1)
104 144 fn2 = util.pconvert(fn2)
105 145
106 146 if not opts.text and (util.binary(a) or util.binary(b)):
107 147 if a and b and len(a) == len(b) and a == b:
108 148 return ""
109 149 l = ['Binary file %s has changed\n' % fn1]
110 150 elif not a:
111 151 b = splitnewlines(b)
112 152 if a is None:
113 153 l1 = '--- /dev/null%s' % datetag(epoch, False)
114 154 else:
115 155 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
116 156 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
117 157 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
118 158 l = [l1, l2, l3] + ["+" + e for e in b]
119 159 elif not b:
120 160 a = splitnewlines(a)
121 161 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
122 162 if b is None:
123 163 l2 = '+++ /dev/null%s' % datetag(epoch, False)
124 164 else:
125 165 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
126 166 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
127 167 l = [l1, l2, l3] + ["-" + e for e in a]
128 168 else:
129 169 al = splitnewlines(a)
130 170 bl = splitnewlines(b)
131 171 l = list(_unidiff(a, b, al, bl, opts=opts))
132 172 if not l:
133 173 return ""
134 174
135 175 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
136 176 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
137 177
138 178 for ln in xrange(len(l)):
139 179 if l[ln][-1] != '\n':
140 180 l[ln] += "\n\ No newline at end of file\n"
141 181
142 182 if r:
143 183 l.insert(0, diffline(r, fn1, fn2, opts))
144 184
145 185 return "".join(l)
146 186
147 187 # creates a headerless unified diff
148 188 # t1 and t2 are the text to be diffed
149 189 # l1 and l2 are the text broken up into lines
150 190 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
151 191 def contextend(l, len):
152 192 ret = l + opts.context
153 193 if ret > len:
154 194 ret = len
155 195 return ret
156 196
157 197 def contextstart(l):
158 198 ret = l - opts.context
159 199 if ret < 0:
160 200 return 0
161 201 return ret
162 202
163 203 lastfunc = [0, '']
164 204 def yieldhunk(hunk):
165 205 (astart, a2, bstart, b2, delta) = hunk
166 206 aend = contextend(a2, len(l1))
167 207 alen = aend - astart
168 208 blen = b2 - bstart + aend - a2
169 209
170 210 func = ""
171 211 if opts.showfunc:
172 212 lastpos, func = lastfunc
173 213 # walk backwards from the start of the context up to the start of
174 214 # the previous hunk context until we find a line starting with an
175 215 # alphanumeric char.
176 216 for i in xrange(astart - 1, lastpos - 1, -1):
177 217 if l1[i][0].isalnum():
178 218 func = ' ' + l1[i].rstrip()[:40]
179 219 lastfunc[1] = func
180 220 break
181 221 # by recording this hunk's starting point as the next place to
182 222 # start looking for function lines, we avoid reading any line in
183 223 # the file more than once.
184 224 lastfunc[0] = astart
185 225
186 226 # zero-length hunk ranges report their start line as one less
187 227 if alen:
188 228 astart += 1
189 229 if blen:
190 230 bstart += 1
191 231
192 232 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
193 233 bstart, blen, func)
194 234 for x in delta:
195 235 yield x
196 236 for x in xrange(a2, aend):
197 237 yield ' ' + l1[x]
198 238
199 239 # bdiff.blocks gives us the matching sequences in the files. The loop
200 240 # below finds the spaces between those matching sequences and translates
201 241 # them into diff output.
202 242 #
203 if opts.ignorews or opts.ignorewsamount:
204 t1 = wsclean(opts, t1, False)
205 t2 = wsclean(opts, t2, False)
206
207 diff = bdiff.blocks(t1, t2)
208 243 hunk = None
209 for i, s1 in enumerate(diff):
210 # The first match is special.
211 # we've either found a match starting at line 0 or a match later
212 # in the file. If it starts later, old and new below will both be
213 # empty and we'll continue to the next match.
214 if i > 0:
215 s = diff[i - 1]
216 else:
217 s = [0, 0, 0, 0]
244 for s in diffblocks(t1, t2, opts, l1, l2):
218 245 delta = []
219 a1 = s[1]
220 a2 = s1[0]
221 b1 = s[3]
222 b2 = s1[2]
223
246 a1, a2, b1, b2 = s
224 247 old = l1[a1:a2]
225 248 new = l2[b1:b2]
226 249
227 # bdiff sometimes gives huge matches past eof, this check eats them,
228 # and deals with the special first match case described above
229 if not old and not new:
230 continue
231
232 if opts.ignoreblanklines:
233 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
234 continue
235
236 250 astart = contextstart(a1)
237 251 bstart = contextstart(b1)
238 252 prev = None
239 253 if hunk:
240 254 # join with the previous hunk if it falls inside the context
241 255 if astart < hunk[1] + opts.context + 1:
242 256 prev = hunk
243 257 astart = hunk[1]
244 258 bstart = hunk[3]
245 259 else:
246 260 for x in yieldhunk(hunk):
247 261 yield x
248 262 if prev:
249 263 # we've joined the previous hunk, record the new ending points.
250 264 hunk[1] = a2
251 265 hunk[3] = b2
252 266 delta = hunk[4]
253 267 else:
254 268 # create a new hunk
255 269 hunk = [astart, a2, bstart, b2, delta]
256 270
257 271 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
258 272 delta[len(delta):] = ['-' + x for x in old]
259 273 delta[len(delta):] = ['+' + x for x in new]
260 274
261 275 if hunk:
262 276 for x in yieldhunk(hunk):
263 277 yield x
264 278
265 279 def patchtext(bin):
266 280 pos = 0
267 281 t = []
268 282 while pos < len(bin):
269 283 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
270 284 pos += 12
271 285 t.append(bin[pos:pos + l])
272 286 pos += l
273 287 return "".join(t)
274 288
275 289 def patch(a, bin):
276 290 if len(a) == 0:
277 291 # skip over trivial delta header
278 292 return buffer(bin, 12)
279 293 return mpatch.patches(a, [bin])
280 294
281 295 # similar to difflib.SequenceMatcher.get_matching_blocks
282 296 def get_matching_blocks(a, b):
283 297 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
284 298
285 299 def trivialdiffheader(length):
286 300 return struct.pack(">lll", 0, 0, length)
287 301
288 302 patches = mpatch.patches
289 303 patchedsize = mpatch.patchedsize
290 304 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now