##// END OF EJS Templates
remove header handling out of mdiff.bunidiff, rename it
Benoit Boissinot -
r10614:d0050f36 default
parent child Browse files
Show More
@@ -1,283 +1,274
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import bdiff, mpatch, util
9 import bdiff, mpatch, util
10 import re, struct
10 import re, struct
11
11
12 def splitnewlines(text):
12 def splitnewlines(text):
13 '''like str.splitlines, but only split on newlines.'''
13 '''like str.splitlines, but only split on newlines.'''
14 lines = [l + '\n' for l in text.split('\n')]
14 lines = [l + '\n' for l in text.split('\n')]
15 if lines:
15 if lines:
16 if lines[-1] == '\n':
16 if lines[-1] == '\n':
17 lines.pop()
17 lines.pop()
18 else:
18 else:
19 lines[-1] = lines[-1][:-1]
19 lines[-1] = lines[-1][:-1]
20 return lines
20 return lines
21
21
22 class diffopts(object):
22 class diffopts(object):
23 '''context is the number of context lines
23 '''context is the number of context lines
24 text treats all files as text
24 text treats all files as text
25 showfunc enables diff -p output
25 showfunc enables diff -p output
26 git enables the git extended patch format
26 git enables the git extended patch format
27 nodates removes dates from diff headers
27 nodates removes dates from diff headers
28 ignorews ignores all whitespace changes in the diff
28 ignorews ignores all whitespace changes in the diff
29 ignorewsamount ignores changes in the amount of whitespace
29 ignorewsamount ignores changes in the amount of whitespace
30 ignoreblanklines ignores changes whose lines are all blank
30 ignoreblanklines ignores changes whose lines are all blank
31 upgrade generates git diffs to avoid data loss
31 upgrade generates git diffs to avoid data loss
32 '''
32 '''
33
33
34 defaults = {
34 defaults = {
35 'context': 3,
35 'context': 3,
36 'text': False,
36 'text': False,
37 'showfunc': False,
37 'showfunc': False,
38 'git': False,
38 'git': False,
39 'nodates': False,
39 'nodates': False,
40 'ignorews': False,
40 'ignorews': False,
41 'ignorewsamount': False,
41 'ignorewsamount': False,
42 'ignoreblanklines': False,
42 'ignoreblanklines': False,
43 'upgrade': False,
43 'upgrade': False,
44 }
44 }
45
45
46 __slots__ = defaults.keys()
46 __slots__ = defaults.keys()
47
47
48 def __init__(self, **opts):
48 def __init__(self, **opts):
49 for k in self.__slots__:
49 for k in self.__slots__:
50 v = opts.get(k)
50 v = opts.get(k)
51 if v is None:
51 if v is None:
52 v = self.defaults[k]
52 v = self.defaults[k]
53 setattr(self, k, v)
53 setattr(self, k, v)
54
54
55 try:
55 try:
56 self.context = int(self.context)
56 self.context = int(self.context)
57 except ValueError:
57 except ValueError:
58 raise util.Abort(_('diff context lines count must be '
58 raise util.Abort(_('diff context lines count must be '
59 'an integer, not %r') % self.context)
59 'an integer, not %r') % self.context)
60
60
61 def copy(self, **kwargs):
61 def copy(self, **kwargs):
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 opts.update(kwargs)
63 opts.update(kwargs)
64 return diffopts(**opts)
64 return diffopts(**opts)
65
65
66 defaultopts = diffopts()
66 defaultopts = diffopts()
67
67
68 def wsclean(opts, text, blank=True):
68 def wsclean(opts, text, blank=True):
69 if opts.ignorews:
69 if opts.ignorews:
70 text = re.sub('[ \t]+', '', text)
70 text = re.sub('[ \t]+', '', text)
71 elif opts.ignorewsamount:
71 elif opts.ignorewsamount:
72 text = re.sub('[ \t]+', ' ', text)
72 text = re.sub('[ \t]+', ' ', text)
73 text = re.sub('[ \t]+\n', '\n', text)
73 text = re.sub('[ \t]+\n', '\n', text)
74 if blank and opts.ignoreblanklines:
74 if blank and opts.ignoreblanklines:
75 text = re.sub('\n+', '', text)
75 text = re.sub('\n+', '', text)
76 return text
76 return text
77
77
78 def diffline(revs, a, b, opts):
78 def diffline(revs, a, b, opts):
79 parts = ['diff']
79 parts = ['diff']
80 if opts.git:
80 if opts.git:
81 parts.append('--git')
81 parts.append('--git')
82 if revs and not opts.git:
82 if revs and not opts.git:
83 parts.append(' '.join(["-r %s" % rev for rev in revs]))
83 parts.append(' '.join(["-r %s" % rev for rev in revs]))
84 if opts.git:
84 if opts.git:
85 parts.append('a/%s' % a)
85 parts.append('a/%s' % a)
86 parts.append('b/%s' % b)
86 parts.append('b/%s' % b)
87 else:
87 else:
88 parts.append(a)
88 parts.append(a)
89 return ' '.join(parts) + '\n'
89 return ' '.join(parts) + '\n'
90
90
91 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
91 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
92 def datetag(date, addtab=True):
92 def datetag(date, addtab=True):
93 if not opts.git and not opts.nodates:
93 if not opts.git and not opts.nodates:
94 return '\t%s\n' % date
94 return '\t%s\n' % date
95 if addtab and ' ' in fn1:
95 if addtab and ' ' in fn1:
96 return '\t\n'
96 return '\t\n'
97 return '\n'
97 return '\n'
98
98
99 if not a and not b:
99 if not a and not b:
100 return ""
100 return ""
101 epoch = util.datestr((0, 0))
101 epoch = util.datestr((0, 0))
102
102
103 if not opts.text and (util.binary(a) or util.binary(b)):
103 if not opts.text and (util.binary(a) or util.binary(b)):
104 if a and b and len(a) == len(b) and a == b:
104 if a and b and len(a) == len(b) and a == b:
105 return ""
105 return ""
106 l = ['Binary file %s has changed\n' % fn1]
106 l = ['Binary file %s has changed\n' % fn1]
107 elif not a:
107 elif not a:
108 b = splitnewlines(b)
108 b = splitnewlines(b)
109 if a is None:
109 if a is None:
110 l1 = '--- /dev/null%s' % datetag(epoch, False)
110 l1 = '--- /dev/null%s' % datetag(epoch, False)
111 else:
111 else:
112 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
112 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
113 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
113 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
114 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
114 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
115 l = [l1, l2, l3] + ["+" + e for e in b]
115 l = [l1, l2, l3] + ["+" + e for e in b]
116 elif not b:
116 elif not b:
117 a = splitnewlines(a)
117 a = splitnewlines(a)
118 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
118 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
119 if b is None:
119 if b is None:
120 l2 = '+++ /dev/null%s' % datetag(epoch, False)
120 l2 = '+++ /dev/null%s' % datetag(epoch, False)
121 else:
121 else:
122 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
122 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
123 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
123 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
124 l = [l1, l2, l3] + ["-" + e for e in a]
124 l = [l1, l2, l3] + ["-" + e for e in a]
125 else:
125 else:
126 al = splitnewlines(a)
126 al = splitnewlines(a)
127 bl = splitnewlines(b)
127 bl = splitnewlines(b)
128 l = list(bunidiff(a, b, al, bl, "a/" + fn1, "b/" + fn2, opts=opts))
128 l = list(_unidiff(a, b, al, bl, opts=opts))
129 if not l:
129 if not l:
130 return ""
130 return ""
131 # difflib uses a space, rather than a tab
131
132 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
132 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
133 l[1] = "%s%s" % (l[1][:-2], datetag(bd))
133 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
134
134
135 for ln in xrange(len(l)):
135 for ln in xrange(len(l)):
136 if l[ln][-1] != '\n':
136 if l[ln][-1] != '\n':
137 l[ln] += "\n\ No newline at end of file\n"
137 l[ln] += "\n\ No newline at end of file\n"
138
138
139 if r:
139 if r:
140 l.insert(0, diffline(r, fn1, fn2, opts))
140 l.insert(0, diffline(r, fn1, fn2, opts))
141
141
142 return "".join(l)
142 return "".join(l)
143
143
144 # somewhat self contained replacement for difflib.unified_diff
144 # creates a headerless unified diff
145 # t1 and t2 are the text to be diffed
145 # t1 and t2 are the text to be diffed
146 # l1 and l2 are the text broken up into lines
146 # l1 and l2 are the text broken up into lines
147 # header1 and header2 are the filenames for the diff output
147 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
148 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
149 def contextend(l, len):
148 def contextend(l, len):
150 ret = l + opts.context
149 ret = l + opts.context
151 if ret > len:
150 if ret > len:
152 ret = len
151 ret = len
153 return ret
152 return ret
154
153
155 def contextstart(l):
154 def contextstart(l):
156 ret = l - opts.context
155 ret = l - opts.context
157 if ret < 0:
156 if ret < 0:
158 return 0
157 return 0
159 return ret
158 return ret
160
159
161 def yieldhunk(hunk, header):
160 def yieldhunk(hunk):
162 if header:
163 for x in header:
164 yield x
165 (astart, a2, bstart, b2, delta) = hunk
161 (astart, a2, bstart, b2, delta) = hunk
166 aend = contextend(a2, len(l1))
162 aend = contextend(a2, len(l1))
167 alen = aend - astart
163 alen = aend - astart
168 blen = b2 - bstart + aend - a2
164 blen = b2 - bstart + aend - a2
169
165
170 func = ""
166 func = ""
171 if opts.showfunc:
167 if opts.showfunc:
172 # walk backwards from the start of the context
168 # walk backwards from the start of the context
173 # to find a line starting with an alphanumeric char.
169 # to find a line starting with an alphanumeric char.
174 for x in xrange(astart - 1, -1, -1):
170 for x in xrange(astart - 1, -1, -1):
175 t = l1[x].rstrip()
171 t = l1[x].rstrip()
176 if funcre.match(t):
172 if funcre.match(t):
177 func = ' ' + t[:40]
173 func = ' ' + t[:40]
178 break
174 break
179
175
180 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
176 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
181 bstart + 1, blen, func)
177 bstart + 1, blen, func)
182 for x in delta:
178 for x in delta:
183 yield x
179 yield x
184 for x in xrange(a2, aend):
180 for x in xrange(a2, aend):
185 yield ' ' + l1[x]
181 yield ' ' + l1[x]
186
182
187 header = ["--- %s\t\n" % header1, "+++ %s\t\n" % header2]
188
189 if opts.showfunc:
183 if opts.showfunc:
190 funcre = re.compile('\w')
184 funcre = re.compile('\w')
191
185
192 # bdiff.blocks gives us the matching sequences in the files. The loop
186 # bdiff.blocks gives us the matching sequences in the files. The loop
193 # below finds the spaces between those matching sequences and translates
187 # below finds the spaces between those matching sequences and translates
194 # them into diff output.
188 # them into diff output.
195 #
189 #
196 if opts.ignorews or opts.ignorewsamount:
190 if opts.ignorews or opts.ignorewsamount:
197 t1 = wsclean(opts, t1, False)
191 t1 = wsclean(opts, t1, False)
198 t2 = wsclean(opts, t2, False)
192 t2 = wsclean(opts, t2, False)
199
193
200 diff = bdiff.blocks(t1, t2)
194 diff = bdiff.blocks(t1, t2)
201 hunk = None
195 hunk = None
202 for i, s1 in enumerate(diff):
196 for i, s1 in enumerate(diff):
203 # The first match is special.
197 # The first match is special.
204 # we've either found a match starting at line 0 or a match later
198 # we've either found a match starting at line 0 or a match later
205 # in the file. If it starts later, old and new below will both be
199 # in the file. If it starts later, old and new below will both be
206 # empty and we'll continue to the next match.
200 # empty and we'll continue to the next match.
207 if i > 0:
201 if i > 0:
208 s = diff[i - 1]
202 s = diff[i - 1]
209 else:
203 else:
210 s = [0, 0, 0, 0]
204 s = [0, 0, 0, 0]
211 delta = []
205 delta = []
212 a1 = s[1]
206 a1 = s[1]
213 a2 = s1[0]
207 a2 = s1[0]
214 b1 = s[3]
208 b1 = s[3]
215 b2 = s1[2]
209 b2 = s1[2]
216
210
217 old = l1[a1:a2]
211 old = l1[a1:a2]
218 new = l2[b1:b2]
212 new = l2[b1:b2]
219
213
220 # bdiff sometimes gives huge matches past eof, this check eats them,
214 # bdiff sometimes gives huge matches past eof, this check eats them,
221 # and deals with the special first match case described above
215 # and deals with the special first match case described above
222 if not old and not new:
216 if not old and not new:
223 continue
217 continue
224
218
225 if opts.ignoreblanklines:
219 if opts.ignoreblanklines:
226 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
220 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
227 continue
221 continue
228
222
229 astart = contextstart(a1)
223 astart = contextstart(a1)
230 bstart = contextstart(b1)
224 bstart = contextstart(b1)
231 prev = None
225 prev = None
232 if hunk:
226 if hunk:
233 # join with the previous hunk if it falls inside the context
227 # join with the previous hunk if it falls inside the context
234 if astart < hunk[1] + opts.context + 1:
228 if astart < hunk[1] + opts.context + 1:
235 prev = hunk
229 prev = hunk
236 astart = hunk[1]
230 astart = hunk[1]
237 bstart = hunk[3]
231 bstart = hunk[3]
238 else:
232 else:
239 for x in yieldhunk(hunk, header):
233 for x in yieldhunk(hunk):
240 yield x
234 yield x
241 # we only want to yield the header if the files differ, and
242 # we only want to yield it once.
243 header = None
244 if prev:
235 if prev:
245 # we've joined the previous hunk, record the new ending points.
236 # we've joined the previous hunk, record the new ending points.
246 hunk[1] = a2
237 hunk[1] = a2
247 hunk[3] = b2
238 hunk[3] = b2
248 delta = hunk[4]
239 delta = hunk[4]
249 else:
240 else:
250 # create a new hunk
241 # create a new hunk
251 hunk = [astart, a2, bstart, b2, delta]
242 hunk = [astart, a2, bstart, b2, delta]
252
243
253 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
244 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
254 delta[len(delta):] = ['-' + x for x in old]
245 delta[len(delta):] = ['-' + x for x in old]
255 delta[len(delta):] = ['+' + x for x in new]
246 delta[len(delta):] = ['+' + x for x in new]
256
247
257 if hunk:
248 if hunk:
258 for x in yieldhunk(hunk, header):
249 for x in yieldhunk(hunk):
259 yield x
250 yield x
260
251
261 def patchtext(bin):
252 def patchtext(bin):
262 pos = 0
253 pos = 0
263 t = []
254 t = []
264 while pos < len(bin):
255 while pos < len(bin):
265 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
256 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
266 pos += 12
257 pos += 12
267 t.append(bin[pos:pos + l])
258 t.append(bin[pos:pos + l])
268 pos += l
259 pos += l
269 return "".join(t)
260 return "".join(t)
270
261
271 def patch(a, bin):
262 def patch(a, bin):
272 return mpatch.patches(a, [bin])
263 return mpatch.patches(a, [bin])
273
264
274 # similar to difflib.SequenceMatcher.get_matching_blocks
265 # similar to difflib.SequenceMatcher.get_matching_blocks
275 def get_matching_blocks(a, b):
266 def get_matching_blocks(a, b):
276 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
267 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
277
268
278 def trivialdiffheader(length):
269 def trivialdiffheader(length):
279 return struct.pack(">lll", 0, 0, length)
270 return struct.pack(">lll", 0, 0, length)
280
271
281 patches = mpatch.patches
272 patches = mpatch.patches
282 patchedsize = mpatch.patchedsize
273 patchedsize = mpatch.patchedsize
283 textdiff = bdiff.bdiff
274 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now