##// END OF EJS Templates
diff: fix obscure off-by-one error in diff -p
Dirkjan Ochtman -
r7436:07faba78 default
parent child Browse files
Show More
@@ -1,269 +1,269 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 from i18n import _
8 from i18n import _
9 import bdiff, mpatch, re, struct, util
9 import bdiff, mpatch, re, struct, util
10
10
11 def splitnewlines(text):
11 def splitnewlines(text):
12 '''like str.splitlines, but only split on newlines.'''
12 '''like str.splitlines, but only split on newlines.'''
13 lines = [l + '\n' for l in text.split('\n')]
13 lines = [l + '\n' for l in text.split('\n')]
14 if lines:
14 if lines:
15 if lines[-1] == '\n':
15 if lines[-1] == '\n':
16 lines.pop()
16 lines.pop()
17 else:
17 else:
18 lines[-1] = lines[-1][:-1]
18 lines[-1] = lines[-1][:-1]
19 return lines
19 return lines
20
20
21 class diffopts(object):
21 class diffopts(object):
22 '''context is the number of context lines
22 '''context is the number of context lines
23 text treats all files as text
23 text treats all files as text
24 showfunc enables diff -p output
24 showfunc enables diff -p output
25 git enables the git extended patch format
25 git enables the git extended patch format
26 nodates removes dates from diff headers
26 nodates removes dates from diff headers
27 ignorews ignores all whitespace changes in the diff
27 ignorews ignores all whitespace changes in the diff
28 ignorewsamount ignores changes in the amount of whitespace
28 ignorewsamount ignores changes in the amount of whitespace
29 ignoreblanklines ignores changes whose lines are all blank'''
29 ignoreblanklines ignores changes whose lines are all blank'''
30
30
31 defaults = {
31 defaults = {
32 'context': 3,
32 'context': 3,
33 'text': False,
33 'text': False,
34 'showfunc': False,
34 'showfunc': False,
35 'git': False,
35 'git': False,
36 'nodates': False,
36 'nodates': False,
37 'ignorews': False,
37 'ignorews': False,
38 'ignorewsamount': False,
38 'ignorewsamount': False,
39 'ignoreblanklines': False,
39 'ignoreblanklines': False,
40 }
40 }
41
41
42 __slots__ = defaults.keys()
42 __slots__ = defaults.keys()
43
43
44 def __init__(self, **opts):
44 def __init__(self, **opts):
45 for k in self.__slots__:
45 for k in self.__slots__:
46 v = opts.get(k)
46 v = opts.get(k)
47 if v is None:
47 if v is None:
48 v = self.defaults[k]
48 v = self.defaults[k]
49 setattr(self, k, v)
49 setattr(self, k, v)
50
50
51 try:
51 try:
52 self.context = int(self.context)
52 self.context = int(self.context)
53 except ValueError:
53 except ValueError:
54 raise util.Abort(_('diff context lines count must be '
54 raise util.Abort(_('diff context lines count must be '
55 'an integer, not %r') % self.context)
55 'an integer, not %r') % self.context)
56
56
57 defaultopts = diffopts()
57 defaultopts = diffopts()
58
58
59 def wsclean(opts, text):
59 def wsclean(opts, text):
60 if opts.ignorews:
60 if opts.ignorews:
61 text = re.sub('[ \t]+', '', text)
61 text = re.sub('[ \t]+', '', text)
62 elif opts.ignorewsamount:
62 elif opts.ignorewsamount:
63 text = re.sub('[ \t]+', ' ', text)
63 text = re.sub('[ \t]+', ' ', text)
64 text = re.sub('[ \t]+\n', '\n', text)
64 text = re.sub('[ \t]+\n', '\n', text)
65 if opts.ignoreblanklines:
65 if opts.ignoreblanklines:
66 text = re.sub('\n+', '', text)
66 text = re.sub('\n+', '', text)
67 return text
67 return text
68
68
69 def diffline(revs, a, b, opts):
69 def diffline(revs, a, b, opts):
70 parts = ['diff']
70 parts = ['diff']
71 if opts.git:
71 if opts.git:
72 parts.append('--git')
72 parts.append('--git')
73 if revs and not opts.git:
73 if revs and not opts.git:
74 parts.append(' '.join(["-r %s" % rev for rev in revs]))
74 parts.append(' '.join(["-r %s" % rev for rev in revs]))
75 if opts.git:
75 if opts.git:
76 parts.append('a/%s' % a)
76 parts.append('a/%s' % a)
77 parts.append('b/%s' % b)
77 parts.append('b/%s' % b)
78 else:
78 else:
79 parts.append(a)
79 parts.append(a)
80 return ' '.join(parts) + '\n'
80 return ' '.join(parts) + '\n'
81
81
82 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
82 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
83 def datetag(date, addtab=True):
83 def datetag(date, addtab=True):
84 if not opts.git and not opts.nodates:
84 if not opts.git and not opts.nodates:
85 return '\t%s\n' % date
85 return '\t%s\n' % date
86 if addtab and ' ' in fn1:
86 if addtab and ' ' in fn1:
87 return '\t\n'
87 return '\t\n'
88 return '\n'
88 return '\n'
89
89
90 if not a and not b: return ""
90 if not a and not b: return ""
91 epoch = util.datestr((0, 0))
91 epoch = util.datestr((0, 0))
92
92
93 if not opts.text and (util.binary(a) or util.binary(b)):
93 if not opts.text and (util.binary(a) or util.binary(b)):
94 if a and b and len(a) == len(b) and a == b:
94 if a and b and len(a) == len(b) and a == b:
95 return ""
95 return ""
96 l = ['Binary file %s has changed\n' % fn1]
96 l = ['Binary file %s has changed\n' % fn1]
97 elif not a:
97 elif not a:
98 b = splitnewlines(b)
98 b = splitnewlines(b)
99 if a is None:
99 if a is None:
100 l1 = '--- /dev/null%s' % datetag(epoch, False)
100 l1 = '--- /dev/null%s' % datetag(epoch, False)
101 else:
101 else:
102 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
102 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
103 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
103 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
104 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
104 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
105 l = [l1, l2, l3] + ["+" + e for e in b]
105 l = [l1, l2, l3] + ["+" + e for e in b]
106 elif not b:
106 elif not b:
107 a = splitnewlines(a)
107 a = splitnewlines(a)
108 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
108 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
109 if b is None:
109 if b is None:
110 l2 = '+++ /dev/null%s' % datetag(epoch, False)
110 l2 = '+++ /dev/null%s' % datetag(epoch, False)
111 else:
111 else:
112 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
112 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
113 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
113 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
114 l = [l1, l2, l3] + ["-" + e for e in a]
114 l = [l1, l2, l3] + ["-" + e for e in a]
115 else:
115 else:
116 al = splitnewlines(a)
116 al = splitnewlines(a)
117 bl = splitnewlines(b)
117 bl = splitnewlines(b)
118 l = list(bunidiff(a, b, al, bl, "a/" + fn1, "b/" + fn2, opts=opts))
118 l = list(bunidiff(a, b, al, bl, "a/" + fn1, "b/" + fn2, opts=opts))
119 if not l: return ""
119 if not l: return ""
120 # difflib uses a space, rather than a tab
120 # difflib uses a space, rather than a tab
121 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
121 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
122 l[1] = "%s%s" % (l[1][:-2], datetag(bd))
122 l[1] = "%s%s" % (l[1][:-2], datetag(bd))
123
123
124 for ln in xrange(len(l)):
124 for ln in xrange(len(l)):
125 if l[ln][-1] != '\n':
125 if l[ln][-1] != '\n':
126 l[ln] += "\n\ No newline at end of file\n"
126 l[ln] += "\n\ No newline at end of file\n"
127
127
128 if r:
128 if r:
129 l.insert(0, diffline(r, fn1, fn2, opts))
129 l.insert(0, diffline(r, fn1, fn2, opts))
130
130
131 return "".join(l)
131 return "".join(l)
132
132
133 # somewhat self contained replacement for difflib.unified_diff
133 # somewhat self contained replacement for difflib.unified_diff
134 # t1 and t2 are the text to be diffed
134 # t1 and t2 are the text to be diffed
135 # l1 and l2 are the text broken up into lines
135 # l1 and l2 are the text broken up into lines
136 # header1 and header2 are the filenames for the diff output
136 # header1 and header2 are the filenames for the diff output
137 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
137 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
138 def contextend(l, len):
138 def contextend(l, len):
139 ret = l + opts.context
139 ret = l + opts.context
140 if ret > len:
140 if ret > len:
141 ret = len
141 ret = len
142 return ret
142 return ret
143
143
144 def contextstart(l):
144 def contextstart(l):
145 ret = l - opts.context
145 ret = l - opts.context
146 if ret < 0:
146 if ret < 0:
147 return 0
147 return 0
148 return ret
148 return ret
149
149
150 def yieldhunk(hunk, header):
150 def yieldhunk(hunk, header):
151 if header:
151 if header:
152 for x in header:
152 for x in header:
153 yield x
153 yield x
154 (astart, a2, bstart, b2, delta) = hunk
154 (astart, a2, bstart, b2, delta) = hunk
155 aend = contextend(a2, len(l1))
155 aend = contextend(a2, len(l1))
156 alen = aend - astart
156 alen = aend - astart
157 blen = b2 - bstart + aend - a2
157 blen = b2 - bstart + aend - a2
158
158
159 func = ""
159 func = ""
160 if opts.showfunc:
160 if opts.showfunc:
161 # walk backwards from the start of the context
161 # walk backwards from the start of the context
162 # to find a line starting with an alphanumeric char.
162 # to find a line starting with an alphanumeric char.
163 for x in xrange(astart, -1, -1):
163 for x in xrange(astart - 1, -1, -1):
164 t = l1[x].rstrip()
164 t = l1[x].rstrip()
165 if funcre.match(t):
165 if funcre.match(t):
166 func = ' ' + t[:40]
166 func = ' ' + t[:40]
167 break
167 break
168
168
169 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
169 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
170 bstart + 1, blen, func)
170 bstart + 1, blen, func)
171 for x in delta:
171 for x in delta:
172 yield x
172 yield x
173 for x in xrange(a2, aend):
173 for x in xrange(a2, aend):
174 yield ' ' + l1[x]
174 yield ' ' + l1[x]
175
175
176 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
176 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
177
177
178 if opts.showfunc:
178 if opts.showfunc:
179 funcre = re.compile('\w')
179 funcre = re.compile('\w')
180
180
181 # bdiff.blocks gives us the matching sequences in the files. The loop
181 # bdiff.blocks gives us the matching sequences in the files. The loop
182 # below finds the spaces between those matching sequences and translates
182 # below finds the spaces between those matching sequences and translates
183 # them into diff output.
183 # them into diff output.
184 #
184 #
185 diff = bdiff.blocks(t1, t2)
185 diff = bdiff.blocks(t1, t2)
186 hunk = None
186 hunk = None
187 for i in xrange(len(diff)):
187 for i in xrange(len(diff)):
188 # The first match is special.
188 # The first match is special.
189 # we've either found a match starting at line 0 or a match later
189 # we've either found a match starting at line 0 or a match later
190 # in the file. If it starts later, old and new below will both be
190 # in the file. If it starts later, old and new below will both be
191 # empty and we'll continue to the next match.
191 # empty and we'll continue to the next match.
192 if i > 0:
192 if i > 0:
193 s = diff[i-1]
193 s = diff[i-1]
194 else:
194 else:
195 s = [0, 0, 0, 0]
195 s = [0, 0, 0, 0]
196 delta = []
196 delta = []
197 s1 = diff[i]
197 s1 = diff[i]
198 a1 = s[1]
198 a1 = s[1]
199 a2 = s1[0]
199 a2 = s1[0]
200 b1 = s[3]
200 b1 = s[3]
201 b2 = s1[2]
201 b2 = s1[2]
202
202
203 old = l1[a1:a2]
203 old = l1[a1:a2]
204 new = l2[b1:b2]
204 new = l2[b1:b2]
205
205
206 # bdiff sometimes gives huge matches past eof, this check eats them,
206 # bdiff sometimes gives huge matches past eof, this check eats them,
207 # and deals with the special first match case described above
207 # and deals with the special first match case described above
208 if not old and not new:
208 if not old and not new:
209 continue
209 continue
210
210
211 if opts.ignorews or opts.ignorewsamount or opts.ignoreblanklines:
211 if opts.ignorews or opts.ignorewsamount or opts.ignoreblanklines:
212 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
212 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
213 continue
213 continue
214
214
215 astart = contextstart(a1)
215 astart = contextstart(a1)
216 bstart = contextstart(b1)
216 bstart = contextstart(b1)
217 prev = None
217 prev = None
218 if hunk:
218 if hunk:
219 # join with the previous hunk if it falls inside the context
219 # join with the previous hunk if it falls inside the context
220 if astart < hunk[1] + opts.context + 1:
220 if astart < hunk[1] + opts.context + 1:
221 prev = hunk
221 prev = hunk
222 astart = hunk[1]
222 astart = hunk[1]
223 bstart = hunk[3]
223 bstart = hunk[3]
224 else:
224 else:
225 for x in yieldhunk(hunk, header):
225 for x in yieldhunk(hunk, header):
226 yield x
226 yield x
227 # we only want to yield the header if the files differ, and
227 # we only want to yield the header if the files differ, and
228 # we only want to yield it once.
228 # we only want to yield it once.
229 header = None
229 header = None
230 if prev:
230 if prev:
231 # we've joined the previous hunk, record the new ending points.
231 # we've joined the previous hunk, record the new ending points.
232 hunk[1] = a2
232 hunk[1] = a2
233 hunk[3] = b2
233 hunk[3] = b2
234 delta = hunk[4]
234 delta = hunk[4]
235 else:
235 else:
236 # create a new hunk
236 # create a new hunk
237 hunk = [ astart, a2, bstart, b2, delta ]
237 hunk = [ astart, a2, bstart, b2, delta ]
238
238
239 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
239 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
240 delta[len(delta):] = [ '-' + x for x in old ]
240 delta[len(delta):] = [ '-' + x for x in old ]
241 delta[len(delta):] = [ '+' + x for x in new ]
241 delta[len(delta):] = [ '+' + x for x in new ]
242
242
243 if hunk:
243 if hunk:
244 for x in yieldhunk(hunk, header):
244 for x in yieldhunk(hunk, header):
245 yield x
245 yield x
246
246
247 def patchtext(bin):
247 def patchtext(bin):
248 pos = 0
248 pos = 0
249 t = []
249 t = []
250 while pos < len(bin):
250 while pos < len(bin):
251 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
251 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
252 pos += 12
252 pos += 12
253 t.append(bin[pos:pos + l])
253 t.append(bin[pos:pos + l])
254 pos += l
254 pos += l
255 return "".join(t)
255 return "".join(t)
256
256
257 def patch(a, bin):
257 def patch(a, bin):
258 return mpatch.patches(a, [bin])
258 return mpatch.patches(a, [bin])
259
259
260 # similar to difflib.SequenceMatcher.get_matching_blocks
260 # similar to difflib.SequenceMatcher.get_matching_blocks
261 def get_matching_blocks(a, b):
261 def get_matching_blocks(a, b):
262 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
262 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
263
263
264 def trivialdiffheader(length):
264 def trivialdiffheader(length):
265 return struct.pack(">lll", 0, 0, length)
265 return struct.pack(">lll", 0, 0, length)
266
266
267 patches = mpatch.patches
267 patches = mpatch.patches
268 patchedsize = mpatch.patchedsize
268 patchedsize = mpatch.patchedsize
269 textdiff = bdiff.bdiff
269 textdiff = bdiff.bdiff
@@ -1,49 +1,58 b''
1 #!/bin/sh
1 #!/bin/sh
2
2
3 hg init repo
3 hg init repo
4 cd repo
4 cd repo
5 cat > a <<EOF
5 cat > a <<EOF
6 c
6 c
7 c
7 c
8 a
8 a
9 a
9 a
10 b
10 b
11 a
11 a
12 a
12 a
13 c
13 c
14 c
14 c
15 EOF
15 EOF
16 hg ci -Am adda
16 hg ci -Am adda
17 cat > a <<EOF
17 cat > a <<EOF
18 c
18 c
19 c
19 c
20 a
20 a
21 a
21 a
22 dd
22 dd
23 a
23 a
24 a
24 a
25 c
25 c
26 c
26 c
27 EOF
27 EOF
28
28
29 echo '% default context'
29 echo '% default context'
30 hg diff --nodates
30 hg diff --nodates
31
31
32 echo '% invalid --unified'
32 echo '% invalid --unified'
33 hg diff --nodates -U foo
33 hg diff --nodates -U foo
34
34
35 echo '% --unified=2'
35 echo '% --unified=2'
36 hg diff --nodates -U 2
36 hg diff --nodates -U 2
37
37
38 echo '% diff.unified=2'
38 echo '% diff.unified=2'
39 hg --config diff.unified=2 diff --nodates
39 hg --config diff.unified=2 diff --nodates
40
40
41 echo '% diff.unified=2 --unified=1'
41 echo '% diff.unified=2 --unified=1'
42 hg diff --nodates -U 1
42 hg diff --nodates -U 1
43
43
44 echo '% invalid diff.unified'
44 echo '% invalid diff.unified'
45 hg --config diff.unified=foo diff --nodates
45 hg --config diff.unified=foo diff --nodates
46
46
47 exit 0
47 echo % test off-by-one error with diff -p
48 hg init diffp
49 cd diffp
50 echo a > a
51 hg ci -Ama
52 rm a
53 echo b > a
54 echo a >> a
55 echo c >> a
56 hg diff -U0 -p
48
57
49
58 exit 0
General Comments 0
You need to be logged in to leave comments. Login now