##// END OF EJS Templates
diff: fix obscure off-by-one error in diff -p
Dirkjan Ochtman -
r7436:07faba78 default
parent child Browse files
Show More
@@ -1,269 +1,269
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from i18n import _
9 9 import bdiff, mpatch, re, struct, util
10 10
11 11 def splitnewlines(text):
12 12 '''like str.splitlines, but only split on newlines.'''
13 13 lines = [l + '\n' for l in text.split('\n')]
14 14 if lines:
15 15 if lines[-1] == '\n':
16 16 lines.pop()
17 17 else:
18 18 lines[-1] = lines[-1][:-1]
19 19 return lines
20 20
21 21 class diffopts(object):
22 22 '''context is the number of context lines
23 23 text treats all files as text
24 24 showfunc enables diff -p output
25 25 git enables the git extended patch format
26 26 nodates removes dates from diff headers
27 27 ignorews ignores all whitespace changes in the diff
28 28 ignorewsamount ignores changes in the amount of whitespace
29 29 ignoreblanklines ignores changes whose lines are all blank'''
30 30
31 31 defaults = {
32 32 'context': 3,
33 33 'text': False,
34 34 'showfunc': False,
35 35 'git': False,
36 36 'nodates': False,
37 37 'ignorews': False,
38 38 'ignorewsamount': False,
39 39 'ignoreblanklines': False,
40 40 }
41 41
42 42 __slots__ = defaults.keys()
43 43
44 44 def __init__(self, **opts):
45 45 for k in self.__slots__:
46 46 v = opts.get(k)
47 47 if v is None:
48 48 v = self.defaults[k]
49 49 setattr(self, k, v)
50 50
51 51 try:
52 52 self.context = int(self.context)
53 53 except ValueError:
54 54 raise util.Abort(_('diff context lines count must be '
55 55 'an integer, not %r') % self.context)
56 56
57 57 defaultopts = diffopts()
58 58
59 59 def wsclean(opts, text):
60 60 if opts.ignorews:
61 61 text = re.sub('[ \t]+', '', text)
62 62 elif opts.ignorewsamount:
63 63 text = re.sub('[ \t]+', ' ', text)
64 64 text = re.sub('[ \t]+\n', '\n', text)
65 65 if opts.ignoreblanklines:
66 66 text = re.sub('\n+', '', text)
67 67 return text
68 68
69 69 def diffline(revs, a, b, opts):
70 70 parts = ['diff']
71 71 if opts.git:
72 72 parts.append('--git')
73 73 if revs and not opts.git:
74 74 parts.append(' '.join(["-r %s" % rev for rev in revs]))
75 75 if opts.git:
76 76 parts.append('a/%s' % a)
77 77 parts.append('b/%s' % b)
78 78 else:
79 79 parts.append(a)
80 80 return ' '.join(parts) + '\n'
81 81
82 82 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
83 83 def datetag(date, addtab=True):
84 84 if not opts.git and not opts.nodates:
85 85 return '\t%s\n' % date
86 86 if addtab and ' ' in fn1:
87 87 return '\t\n'
88 88 return '\n'
89 89
90 90 if not a and not b: return ""
91 91 epoch = util.datestr((0, 0))
92 92
93 93 if not opts.text and (util.binary(a) or util.binary(b)):
94 94 if a and b and len(a) == len(b) and a == b:
95 95 return ""
96 96 l = ['Binary file %s has changed\n' % fn1]
97 97 elif not a:
98 98 b = splitnewlines(b)
99 99 if a is None:
100 100 l1 = '--- /dev/null%s' % datetag(epoch, False)
101 101 else:
102 102 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
103 103 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
104 104 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
105 105 l = [l1, l2, l3] + ["+" + e for e in b]
106 106 elif not b:
107 107 a = splitnewlines(a)
108 108 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
109 109 if b is None:
110 110 l2 = '+++ /dev/null%s' % datetag(epoch, False)
111 111 else:
112 112 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
113 113 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
114 114 l = [l1, l2, l3] + ["-" + e for e in a]
115 115 else:
116 116 al = splitnewlines(a)
117 117 bl = splitnewlines(b)
118 118 l = list(bunidiff(a, b, al, bl, "a/" + fn1, "b/" + fn2, opts=opts))
119 119 if not l: return ""
120 120 # difflib uses a space, rather than a tab
121 121 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
122 122 l[1] = "%s%s" % (l[1][:-2], datetag(bd))
123 123
124 124 for ln in xrange(len(l)):
125 125 if l[ln][-1] != '\n':
126 126 l[ln] += "\n\ No newline at end of file\n"
127 127
128 128 if r:
129 129 l.insert(0, diffline(r, fn1, fn2, opts))
130 130
131 131 return "".join(l)
132 132
133 133 # somewhat self contained replacement for difflib.unified_diff
134 134 # t1 and t2 are the text to be diffed
135 135 # l1 and l2 are the text broken up into lines
136 136 # header1 and header2 are the filenames for the diff output
137 137 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
138 138 def contextend(l, len):
139 139 ret = l + opts.context
140 140 if ret > len:
141 141 ret = len
142 142 return ret
143 143
144 144 def contextstart(l):
145 145 ret = l - opts.context
146 146 if ret < 0:
147 147 return 0
148 148 return ret
149 149
150 150 def yieldhunk(hunk, header):
151 151 if header:
152 152 for x in header:
153 153 yield x
154 154 (astart, a2, bstart, b2, delta) = hunk
155 155 aend = contextend(a2, len(l1))
156 156 alen = aend - astart
157 157 blen = b2 - bstart + aend - a2
158 158
159 159 func = ""
160 160 if opts.showfunc:
161 161 # walk backwards from the start of the context
162 162 # to find a line starting with an alphanumeric char.
163 for x in xrange(astart, -1, -1):
163 for x in xrange(astart - 1, -1, -1):
164 164 t = l1[x].rstrip()
165 165 if funcre.match(t):
166 166 func = ' ' + t[:40]
167 167 break
168 168
169 169 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
170 170 bstart + 1, blen, func)
171 171 for x in delta:
172 172 yield x
173 173 for x in xrange(a2, aend):
174 174 yield ' ' + l1[x]
175 175
176 176 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
177 177
178 178 if opts.showfunc:
179 179 funcre = re.compile('\w')
180 180
181 181 # bdiff.blocks gives us the matching sequences in the files. The loop
182 182 # below finds the spaces between those matching sequences and translates
183 183 # them into diff output.
184 184 #
185 185 diff = bdiff.blocks(t1, t2)
186 186 hunk = None
187 187 for i in xrange(len(diff)):
188 188 # The first match is special.
189 189 # we've either found a match starting at line 0 or a match later
190 190 # in the file. If it starts later, old and new below will both be
191 191 # empty and we'll continue to the next match.
192 192 if i > 0:
193 193 s = diff[i-1]
194 194 else:
195 195 s = [0, 0, 0, 0]
196 196 delta = []
197 197 s1 = diff[i]
198 198 a1 = s[1]
199 199 a2 = s1[0]
200 200 b1 = s[3]
201 201 b2 = s1[2]
202 202
203 203 old = l1[a1:a2]
204 204 new = l2[b1:b2]
205 205
206 206 # bdiff sometimes gives huge matches past eof, this check eats them,
207 207 # and deals with the special first match case described above
208 208 if not old and not new:
209 209 continue
210 210
211 211 if opts.ignorews or opts.ignorewsamount or opts.ignoreblanklines:
212 212 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
213 213 continue
214 214
215 215 astart = contextstart(a1)
216 216 bstart = contextstart(b1)
217 217 prev = None
218 218 if hunk:
219 219 # join with the previous hunk if it falls inside the context
220 220 if astart < hunk[1] + opts.context + 1:
221 221 prev = hunk
222 222 astart = hunk[1]
223 223 bstart = hunk[3]
224 224 else:
225 225 for x in yieldhunk(hunk, header):
226 226 yield x
227 227 # we only want to yield the header if the files differ, and
228 228 # we only want to yield it once.
229 229 header = None
230 230 if prev:
231 231 # we've joined the previous hunk, record the new ending points.
232 232 hunk[1] = a2
233 233 hunk[3] = b2
234 234 delta = hunk[4]
235 235 else:
236 236 # create a new hunk
237 237 hunk = [ astart, a2, bstart, b2, delta ]
238 238
239 239 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
240 240 delta[len(delta):] = [ '-' + x for x in old ]
241 241 delta[len(delta):] = [ '+' + x for x in new ]
242 242
243 243 if hunk:
244 244 for x in yieldhunk(hunk, header):
245 245 yield x
246 246
247 247 def patchtext(bin):
248 248 pos = 0
249 249 t = []
250 250 while pos < len(bin):
251 251 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
252 252 pos += 12
253 253 t.append(bin[pos:pos + l])
254 254 pos += l
255 255 return "".join(t)
256 256
257 257 def patch(a, bin):
258 258 return mpatch.patches(a, [bin])
259 259
260 260 # similar to difflib.SequenceMatcher.get_matching_blocks
261 261 def get_matching_blocks(a, b):
262 262 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
263 263
264 264 def trivialdiffheader(length):
265 265 return struct.pack(">lll", 0, 0, length)
266 266
267 267 patches = mpatch.patches
268 268 patchedsize = mpatch.patchedsize
269 269 textdiff = bdiff.bdiff
@@ -1,49 +1,58
1 1 #!/bin/sh
2 2
3 3 hg init repo
4 4 cd repo
5 5 cat > a <<EOF
6 6 c
7 7 c
8 8 a
9 9 a
10 10 b
11 11 a
12 12 a
13 13 c
14 14 c
15 15 EOF
16 16 hg ci -Am adda
17 17 cat > a <<EOF
18 18 c
19 19 c
20 20 a
21 21 a
22 22 dd
23 23 a
24 24 a
25 25 c
26 26 c
27 27 EOF
28 28
29 29 echo '% default context'
30 30 hg diff --nodates
31 31
32 32 echo '% invalid --unified'
33 33 hg diff --nodates -U foo
34 34
35 35 echo '% --unified=2'
36 36 hg diff --nodates -U 2
37 37
38 38 echo '% diff.unified=2'
39 39 hg --config diff.unified=2 diff --nodates
40 40
41 41 echo '% diff.unified=2 --unified=1'
42 42 hg diff --nodates -U 1
43 43
44 44 echo '% invalid diff.unified'
45 45 hg --config diff.unified=foo diff --nodates
46 46
47 exit 0
47 echo % test off-by-one error with diff -p
48 hg init diffp
49 cd diffp
50 echo a > a
51 hg ci -Ama
52 rm a
53 echo b > a
54 echo a >> a
55 echo c >> a
56 hg diff -U0 -p
48 57
49
58 exit 0
General Comments 0
You need to be logged in to leave comments. Login now