##// END OF EJS Templates
mdiff: make diffblocks() return all blocks, matching and changed...
Patrick Mezard -
r15526:e6519c62 default
parent child Browse files
Show More
@@ -1,304 +1,309 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import bdiff, mpatch, util
10 10 import re, struct
11 11
12 12 def splitnewlines(text):
13 13 '''like str.splitlines, but only split on newlines.'''
14 14 lines = [l + '\n' for l in text.split('\n')]
15 15 if lines:
16 16 if lines[-1] == '\n':
17 17 lines.pop()
18 18 else:
19 19 lines[-1] = lines[-1][:-1]
20 20 return lines
21 21
22 22 class diffopts(object):
23 23 '''context is the number of context lines
24 24 text treats all files as text
25 25 showfunc enables diff -p output
26 26 git enables the git extended patch format
27 27 nodates removes dates from diff headers
28 28 ignorews ignores all whitespace changes in the diff
29 29 ignorewsamount ignores changes in the amount of whitespace
30 30 ignoreblanklines ignores changes whose lines are all blank
31 31 upgrade generates git diffs to avoid data loss
32 32 '''
33 33
34 34 defaults = {
35 35 'context': 3,
36 36 'text': False,
37 37 'showfunc': False,
38 38 'git': False,
39 39 'nodates': False,
40 40 'ignorews': False,
41 41 'ignorewsamount': False,
42 42 'ignoreblanklines': False,
43 43 'upgrade': False,
44 44 }
45 45
46 46 __slots__ = defaults.keys()
47 47
48 48 def __init__(self, **opts):
49 49 for k in self.__slots__:
50 50 v = opts.get(k)
51 51 if v is None:
52 52 v = self.defaults[k]
53 53 setattr(self, k, v)
54 54
55 55 try:
56 56 self.context = int(self.context)
57 57 except ValueError:
58 58 raise util.Abort(_('diff context lines count must be '
59 59 'an integer, not %r') % self.context)
60 60
61 61 def copy(self, **kwargs):
62 62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 63 opts.update(kwargs)
64 64 return diffopts(**opts)
65 65
66 66 defaultopts = diffopts()
67 67
68 68 def wsclean(opts, text, blank=True):
69 69 if opts.ignorews:
70 70 text = re.sub('[ \t\r]+', '', text)
71 71 elif opts.ignorewsamount:
72 72 text = re.sub('[ \t\r]+', ' ', text)
73 73 text = text.replace(' \n', '\n')
74 74 if blank and opts.ignoreblanklines:
75 75 text = re.sub('\n+', '\n', text).strip('\n')
76 76 return text
77 77
78 def diffblocks(text1, text2, opts=None, lines1=None, lines2=None):
79 """Return changed blocks between text1 and text2, the blocks in-between
80 those emitted by bdiff.blocks. Take in account the whitespace normalization
81 rules defined by opts.
82 line1 and line2 are text1 and text2 split with splitnewlines() if they are
83 already available.
78 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
79 """Return (block, type) tuples, where block is an mdiff.blocks
80 line entry. type is '=' for blocks matching exactly one another
81 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
82 matching only after having filtered blank lines.
83 line1 and line2 are text1 and text2 split with splitnewlines() if
84 they are already available.
84 85 """
85 86 if opts is None:
86 87 opts = defaultopts
87 88 if lines1 is None:
88 89 lines1 = splitnewlines(text1)
89 90 if lines2 is None:
90 91 lines2 = splitnewlines(text2)
91 92 if opts.ignorews or opts.ignorewsamount:
92 93 text1 = wsclean(opts, text1, False)
93 94 text2 = wsclean(opts, text2, False)
94 95 diff = bdiff.blocks(text1, text2)
95 96 for i, s1 in enumerate(diff):
96 97 # The first match is special.
97 98 # we've either found a match starting at line 0 or a match later
98 99 # in the file. If it starts later, old and new below will both be
99 100 # empty and we'll continue to the next match.
100 101 if i > 0:
101 102 s = diff[i - 1]
102 103 else:
103 104 s = [0, 0, 0, 0]
104 105 s = [s[1], s1[0], s[3], s1[2]]
105 106 old = lines1[s[0]:s[1]]
106 107 new = lines2[s[2]:s[3]]
107 108
108 109 # bdiff sometimes gives huge matches past eof, this check eats them,
109 110 # and deals with the special first match case described above
110 if not old and not new:
111 continue
112
113 if opts.ignoreblanklines:
114 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
115 continue
116 yield s
111 if old or new:
112 type = '!'
113 if opts.ignoreblanklines:
114 cold = wsclean(opts, "".join(old))
115 cnew = wsclean(opts, "".join(new))
116 if cold == cnew:
117 type = '~'
118 yield s, type
119 yield s1, '='
117 120
118 121 def diffline(revs, a, b, opts):
119 122 parts = ['diff']
120 123 if opts.git:
121 124 parts.append('--git')
122 125 if revs and not opts.git:
123 126 parts.append(' '.join(["-r %s" % rev for rev in revs]))
124 127 if opts.git:
125 128 parts.append('a/%s' % a)
126 129 parts.append('b/%s' % b)
127 130 else:
128 131 parts.append(a)
129 132 return ' '.join(parts) + '\n'
130 133
131 134 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
132 135 def datetag(date, addtab=True):
133 136 if not opts.git and not opts.nodates:
134 137 return '\t%s\n' % date
135 138 if addtab and ' ' in fn1:
136 139 return '\t\n'
137 140 return '\n'
138 141
139 142 if not a and not b:
140 143 return ""
141 144 epoch = util.datestr((0, 0))
142 145
143 146 fn1 = util.pconvert(fn1)
144 147 fn2 = util.pconvert(fn2)
145 148
146 149 if not opts.text and (util.binary(a) or util.binary(b)):
147 150 if a and b and len(a) == len(b) and a == b:
148 151 return ""
149 152 l = ['Binary file %s has changed\n' % fn1]
150 153 elif not a:
151 154 b = splitnewlines(b)
152 155 if a is None:
153 156 l1 = '--- /dev/null%s' % datetag(epoch, False)
154 157 else:
155 158 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
156 159 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
157 160 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
158 161 l = [l1, l2, l3] + ["+" + e for e in b]
159 162 elif not b:
160 163 a = splitnewlines(a)
161 164 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
162 165 if b is None:
163 166 l2 = '+++ /dev/null%s' % datetag(epoch, False)
164 167 else:
165 168 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
166 169 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
167 170 l = [l1, l2, l3] + ["-" + e for e in a]
168 171 else:
169 172 al = splitnewlines(a)
170 173 bl = splitnewlines(b)
171 174 l = list(_unidiff(a, b, al, bl, opts=opts))
172 175 if not l:
173 176 return ""
174 177
175 178 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
176 179 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
177 180
178 181 for ln in xrange(len(l)):
179 182 if l[ln][-1] != '\n':
180 183 l[ln] += "\n\ No newline at end of file\n"
181 184
182 185 if r:
183 186 l.insert(0, diffline(r, fn1, fn2, opts))
184 187
185 188 return "".join(l)
186 189
187 190 # creates a headerless unified diff
188 191 # t1 and t2 are the text to be diffed
189 192 # l1 and l2 are the text broken up into lines
190 193 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
191 194 def contextend(l, len):
192 195 ret = l + opts.context
193 196 if ret > len:
194 197 ret = len
195 198 return ret
196 199
197 200 def contextstart(l):
198 201 ret = l - opts.context
199 202 if ret < 0:
200 203 return 0
201 204 return ret
202 205
203 206 lastfunc = [0, '']
204 207 def yieldhunk(hunk):
205 208 (astart, a2, bstart, b2, delta) = hunk
206 209 aend = contextend(a2, len(l1))
207 210 alen = aend - astart
208 211 blen = b2 - bstart + aend - a2
209 212
210 213 func = ""
211 214 if opts.showfunc:
212 215 lastpos, func = lastfunc
213 216 # walk backwards from the start of the context up to the start of
214 217 # the previous hunk context until we find a line starting with an
215 218 # alphanumeric char.
216 219 for i in xrange(astart - 1, lastpos - 1, -1):
217 220 if l1[i][0].isalnum():
218 221 func = ' ' + l1[i].rstrip()[:40]
219 222 lastfunc[1] = func
220 223 break
221 224 # by recording this hunk's starting point as the next place to
222 225 # start looking for function lines, we avoid reading any line in
223 226 # the file more than once.
224 227 lastfunc[0] = astart
225 228
226 229 # zero-length hunk ranges report their start line as one less
227 230 if alen:
228 231 astart += 1
229 232 if blen:
230 233 bstart += 1
231 234
232 235 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
233 236 bstart, blen, func)
234 237 for x in delta:
235 238 yield x
236 239 for x in xrange(a2, aend):
237 240 yield ' ' + l1[x]
238 241
239 242 # bdiff.blocks gives us the matching sequences in the files. The loop
240 243 # below finds the spaces between those matching sequences and translates
241 244 # them into diff output.
242 245 #
243 246 hunk = None
244 for s in diffblocks(t1, t2, opts, l1, l2):
247 for s, stype in allblocks(t1, t2, opts, l1, l2):
248 if stype != '!':
249 continue
245 250 delta = []
246 251 a1, a2, b1, b2 = s
247 252 old = l1[a1:a2]
248 253 new = l2[b1:b2]
249 254
250 255 astart = contextstart(a1)
251 256 bstart = contextstart(b1)
252 257 prev = None
253 258 if hunk:
254 259 # join with the previous hunk if it falls inside the context
255 260 if astart < hunk[1] + opts.context + 1:
256 261 prev = hunk
257 262 astart = hunk[1]
258 263 bstart = hunk[3]
259 264 else:
260 265 for x in yieldhunk(hunk):
261 266 yield x
262 267 if prev:
263 268 # we've joined the previous hunk, record the new ending points.
264 269 hunk[1] = a2
265 270 hunk[3] = b2
266 271 delta = hunk[4]
267 272 else:
268 273 # create a new hunk
269 274 hunk = [astart, a2, bstart, b2, delta]
270 275
271 276 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
272 277 delta[len(delta):] = ['-' + x for x in old]
273 278 delta[len(delta):] = ['+' + x for x in new]
274 279
275 280 if hunk:
276 281 for x in yieldhunk(hunk):
277 282 yield x
278 283
279 284 def patchtext(bin):
280 285 pos = 0
281 286 t = []
282 287 while pos < len(bin):
283 288 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
284 289 pos += 12
285 290 t.append(bin[pos:pos + l])
286 291 pos += l
287 292 return "".join(t)
288 293
289 294 def patch(a, bin):
290 295 if len(a) == 0:
291 296 # skip over trivial delta header
292 297 return buffer(bin, 12)
293 298 return mpatch.patches(a, [bin])
294 299
295 300 # similar to difflib.SequenceMatcher.get_matching_blocks
296 301 def get_matching_blocks(a, b):
297 302 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
298 303
299 304 def trivialdiffheader(length):
300 305 return struct.pack(">lll", 0, 0, length)
301 306
302 307 patches = mpatch.patches
303 308 patchedsize = mpatch.patchedsize
304 309 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now