##// END OF EJS Templates
mdiff: compare content of binary files directly...
Martin Geisler -
r6871:13fe85fe default
parent child Browse files
Show More
@@ -1,260 +1,257
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 from i18n import _
8 from i18n import _
9 import bdiff, mpatch, re, struct, util
9 import bdiff, mpatch, re, struct, util
10
10
11 def splitnewlines(text):
11 def splitnewlines(text):
12 '''like str.splitlines, but only split on newlines.'''
12 '''like str.splitlines, but only split on newlines.'''
13 lines = [l + '\n' for l in text.split('\n')]
13 lines = [l + '\n' for l in text.split('\n')]
14 if lines:
14 if lines:
15 if lines[-1] == '\n':
15 if lines[-1] == '\n':
16 lines.pop()
16 lines.pop()
17 else:
17 else:
18 lines[-1] = lines[-1][:-1]
18 lines[-1] = lines[-1][:-1]
19 return lines
19 return lines
20
20
21 class diffopts(object):
21 class diffopts(object):
22 '''context is the number of context lines
22 '''context is the number of context lines
23 text treats all files as text
23 text treats all files as text
24 showfunc enables diff -p output
24 showfunc enables diff -p output
25 git enables the git extended patch format
25 git enables the git extended patch format
26 nodates removes dates from diff headers
26 nodates removes dates from diff headers
27 ignorews ignores all whitespace changes in the diff
27 ignorews ignores all whitespace changes in the diff
28 ignorewsamount ignores changes in the amount of whitespace
28 ignorewsamount ignores changes in the amount of whitespace
29 ignoreblanklines ignores changes whose lines are all blank'''
29 ignoreblanklines ignores changes whose lines are all blank'''
30
30
31 defaults = {
31 defaults = {
32 'context': 3,
32 'context': 3,
33 'text': False,
33 'text': False,
34 'showfunc': False,
34 'showfunc': False,
35 'git': False,
35 'git': False,
36 'nodates': False,
36 'nodates': False,
37 'ignorews': False,
37 'ignorews': False,
38 'ignorewsamount': False,
38 'ignorewsamount': False,
39 'ignoreblanklines': False,
39 'ignoreblanklines': False,
40 }
40 }
41
41
42 __slots__ = defaults.keys()
42 __slots__ = defaults.keys()
43
43
44 def __init__(self, **opts):
44 def __init__(self, **opts):
45 for k in self.__slots__:
45 for k in self.__slots__:
46 v = opts.get(k)
46 v = opts.get(k)
47 if v is None:
47 if v is None:
48 v = self.defaults[k]
48 v = self.defaults[k]
49 setattr(self, k, v)
49 setattr(self, k, v)
50
50
51 try:
51 try:
52 self.context = int(self.context)
52 self.context = int(self.context)
53 except ValueError:
53 except ValueError:
54 raise util.Abort(_('diff context lines count must be '
54 raise util.Abort(_('diff context lines count must be '
55 'an integer, not %r') % self.context)
55 'an integer, not %r') % self.context)
56
56
57 defaultopts = diffopts()
57 defaultopts = diffopts()
58
58
59 def wsclean(opts, text):
59 def wsclean(opts, text):
60 if opts.ignorews:
60 if opts.ignorews:
61 text = re.sub('[ \t]+', '', text)
61 text = re.sub('[ \t]+', '', text)
62 elif opts.ignorewsamount:
62 elif opts.ignorewsamount:
63 text = re.sub('[ \t]+', ' ', text)
63 text = re.sub('[ \t]+', ' ', text)
64 text = re.sub('[ \t]+\n', '\n', text)
64 text = re.sub('[ \t]+\n', '\n', text)
65 if opts.ignoreblanklines:
65 if opts.ignoreblanklines:
66 text = re.sub('\n+', '', text)
66 text = re.sub('\n+', '', text)
67 return text
67 return text
68
68
69 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
69 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
70 def datetag(date, addtab=True):
70 def datetag(date, addtab=True):
71 if not opts.git and not opts.nodates:
71 if not opts.git and not opts.nodates:
72 return '\t%s\n' % date
72 return '\t%s\n' % date
73 if addtab and ' ' in fn1:
73 if addtab and ' ' in fn1:
74 return '\t\n'
74 return '\t\n'
75 return '\n'
75 return '\n'
76
76
77 if not a and not b: return ""
77 if not a and not b: return ""
78 epoch = util.datestr((0, 0))
78 epoch = util.datestr((0, 0))
79
79
80 if not opts.text and (util.binary(a) or util.binary(b)):
80 if not opts.text and (util.binary(a) or util.binary(b)):
81 def h(v):
81 if a and b and len(a) == len(b) and a == b:
82 # md5 is used instead of sha1 because md5 is supposedly faster
83 return util.md5(v).digest()
84 if a and b and len(a) == len(b) and h(a) == h(b):
85 return ""
82 return ""
86 l = ['Binary file %s has changed\n' % fn1]
83 l = ['Binary file %s has changed\n' % fn1]
87 elif not a:
84 elif not a:
88 b = splitnewlines(b)
85 b = splitnewlines(b)
89 if a is None:
86 if a is None:
90 l1 = '--- /dev/null%s' % datetag(epoch, False)
87 l1 = '--- /dev/null%s' % datetag(epoch, False)
91 else:
88 else:
92 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
89 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
93 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
90 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
94 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
91 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
95 l = [l1, l2, l3] + ["+" + e for e in b]
92 l = [l1, l2, l3] + ["+" + e for e in b]
96 elif not b:
93 elif not b:
97 a = splitnewlines(a)
94 a = splitnewlines(a)
98 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
95 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
99 if b is None:
96 if b is None:
100 l2 = '+++ /dev/null%s' % datetag(epoch, False)
97 l2 = '+++ /dev/null%s' % datetag(epoch, False)
101 else:
98 else:
102 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
99 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
103 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
100 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
104 l = [l1, l2, l3] + ["-" + e for e in a]
101 l = [l1, l2, l3] + ["-" + e for e in a]
105 else:
102 else:
106 al = splitnewlines(a)
103 al = splitnewlines(a)
107 bl = splitnewlines(b)
104 bl = splitnewlines(b)
108 l = list(bunidiff(a, b, al, bl, "a/" + fn1, "b/" + fn2, opts=opts))
105 l = list(bunidiff(a, b, al, bl, "a/" + fn1, "b/" + fn2, opts=opts))
109 if not l: return ""
106 if not l: return ""
110 # difflib uses a space, rather than a tab
107 # difflib uses a space, rather than a tab
111 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
108 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
112 l[1] = "%s%s" % (l[1][:-2], datetag(bd))
109 l[1] = "%s%s" % (l[1][:-2], datetag(bd))
113
110
114 for ln in xrange(len(l)):
111 for ln in xrange(len(l)):
115 if l[ln][-1] != '\n':
112 if l[ln][-1] != '\n':
116 l[ln] += "\n\ No newline at end of file\n"
113 l[ln] += "\n\ No newline at end of file\n"
117
114
118 if r:
115 if r:
119 l.insert(0, "diff %s %s\n" %
116 l.insert(0, "diff %s %s\n" %
120 (' '.join(["-r %s" % rev for rev in r]), fn1))
117 (' '.join(["-r %s" % rev for rev in r]), fn1))
121
118
122 return "".join(l)
119 return "".join(l)
123
120
124 # somewhat self contained replacement for difflib.unified_diff
121 # somewhat self contained replacement for difflib.unified_diff
125 # t1 and t2 are the text to be diffed
122 # t1 and t2 are the text to be diffed
126 # l1 and l2 are the text broken up into lines
123 # l1 and l2 are the text broken up into lines
127 # header1 and header2 are the filenames for the diff output
124 # header1 and header2 are the filenames for the diff output
128 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
125 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
129 def contextend(l, len):
126 def contextend(l, len):
130 ret = l + opts.context
127 ret = l + opts.context
131 if ret > len:
128 if ret > len:
132 ret = len
129 ret = len
133 return ret
130 return ret
134
131
135 def contextstart(l):
132 def contextstart(l):
136 ret = l - opts.context
133 ret = l - opts.context
137 if ret < 0:
134 if ret < 0:
138 return 0
135 return 0
139 return ret
136 return ret
140
137
141 def yieldhunk(hunk, header):
138 def yieldhunk(hunk, header):
142 if header:
139 if header:
143 for x in header:
140 for x in header:
144 yield x
141 yield x
145 (astart, a2, bstart, b2, delta) = hunk
142 (astart, a2, bstart, b2, delta) = hunk
146 aend = contextend(a2, len(l1))
143 aend = contextend(a2, len(l1))
147 alen = aend - astart
144 alen = aend - astart
148 blen = b2 - bstart + aend - a2
145 blen = b2 - bstart + aend - a2
149
146
150 func = ""
147 func = ""
151 if opts.showfunc:
148 if opts.showfunc:
152 # walk backwards from the start of the context
149 # walk backwards from the start of the context
153 # to find a line starting with an alphanumeric char.
150 # to find a line starting with an alphanumeric char.
154 for x in xrange(astart, -1, -1):
151 for x in xrange(astart, -1, -1):
155 t = l1[x].rstrip()
152 t = l1[x].rstrip()
156 if funcre.match(t):
153 if funcre.match(t):
157 func = ' ' + t[:40]
154 func = ' ' + t[:40]
158 break
155 break
159
156
160 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
157 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
161 bstart + 1, blen, func)
158 bstart + 1, blen, func)
162 for x in delta:
159 for x in delta:
163 yield x
160 yield x
164 for x in xrange(a2, aend):
161 for x in xrange(a2, aend):
165 yield ' ' + l1[x]
162 yield ' ' + l1[x]
166
163
167 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
164 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
168
165
169 if opts.showfunc:
166 if opts.showfunc:
170 funcre = re.compile('\w')
167 funcre = re.compile('\w')
171
168
172 # bdiff.blocks gives us the matching sequences in the files. The loop
169 # bdiff.blocks gives us the matching sequences in the files. The loop
173 # below finds the spaces between those matching sequences and translates
170 # below finds the spaces between those matching sequences and translates
174 # them into diff output.
171 # them into diff output.
175 #
172 #
176 diff = bdiff.blocks(t1, t2)
173 diff = bdiff.blocks(t1, t2)
177 hunk = None
174 hunk = None
178 for i in xrange(len(diff)):
175 for i in xrange(len(diff)):
179 # The first match is special.
176 # The first match is special.
180 # we've either found a match starting at line 0 or a match later
177 # we've either found a match starting at line 0 or a match later
181 # in the file. If it starts later, old and new below will both be
178 # in the file. If it starts later, old and new below will both be
182 # empty and we'll continue to the next match.
179 # empty and we'll continue to the next match.
183 if i > 0:
180 if i > 0:
184 s = diff[i-1]
181 s = diff[i-1]
185 else:
182 else:
186 s = [0, 0, 0, 0]
183 s = [0, 0, 0, 0]
187 delta = []
184 delta = []
188 s1 = diff[i]
185 s1 = diff[i]
189 a1 = s[1]
186 a1 = s[1]
190 a2 = s1[0]
187 a2 = s1[0]
191 b1 = s[3]
188 b1 = s[3]
192 b2 = s1[2]
189 b2 = s1[2]
193
190
194 old = l1[a1:a2]
191 old = l1[a1:a2]
195 new = l2[b1:b2]
192 new = l2[b1:b2]
196
193
197 # bdiff sometimes gives huge matches past eof, this check eats them,
194 # bdiff sometimes gives huge matches past eof, this check eats them,
198 # and deals with the special first match case described above
195 # and deals with the special first match case described above
199 if not old and not new:
196 if not old and not new:
200 continue
197 continue
201
198
202 if opts.ignorews or opts.ignorewsamount or opts.ignoreblanklines:
199 if opts.ignorews or opts.ignorewsamount or opts.ignoreblanklines:
203 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
200 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
204 continue
201 continue
205
202
206 astart = contextstart(a1)
203 astart = contextstart(a1)
207 bstart = contextstart(b1)
204 bstart = contextstart(b1)
208 prev = None
205 prev = None
209 if hunk:
206 if hunk:
210 # join with the previous hunk if it falls inside the context
207 # join with the previous hunk if it falls inside the context
211 if astart < hunk[1] + opts.context + 1:
208 if astart < hunk[1] + opts.context + 1:
212 prev = hunk
209 prev = hunk
213 astart = hunk[1]
210 astart = hunk[1]
214 bstart = hunk[3]
211 bstart = hunk[3]
215 else:
212 else:
216 for x in yieldhunk(hunk, header):
213 for x in yieldhunk(hunk, header):
217 yield x
214 yield x
218 # we only want to yield the header if the files differ, and
215 # we only want to yield the header if the files differ, and
219 # we only want to yield it once.
216 # we only want to yield it once.
220 header = None
217 header = None
221 if prev:
218 if prev:
222 # we've joined the previous hunk, record the new ending points.
219 # we've joined the previous hunk, record the new ending points.
223 hunk[1] = a2
220 hunk[1] = a2
224 hunk[3] = b2
221 hunk[3] = b2
225 delta = hunk[4]
222 delta = hunk[4]
226 else:
223 else:
227 # create a new hunk
224 # create a new hunk
228 hunk = [ astart, a2, bstart, b2, delta ]
225 hunk = [ astart, a2, bstart, b2, delta ]
229
226
230 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
227 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
231 delta[len(delta):] = [ '-' + x for x in old ]
228 delta[len(delta):] = [ '-' + x for x in old ]
232 delta[len(delta):] = [ '+' + x for x in new ]
229 delta[len(delta):] = [ '+' + x for x in new ]
233
230
234 if hunk:
231 if hunk:
235 for x in yieldhunk(hunk, header):
232 for x in yieldhunk(hunk, header):
236 yield x
233 yield x
237
234
238 def patchtext(bin):
235 def patchtext(bin):
239 pos = 0
236 pos = 0
240 t = []
237 t = []
241 while pos < len(bin):
238 while pos < len(bin):
242 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
239 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
243 pos += 12
240 pos += 12
244 t.append(bin[pos:pos + l])
241 t.append(bin[pos:pos + l])
245 pos += l
242 pos += l
246 return "".join(t)
243 return "".join(t)
247
244
248 def patch(a, bin):
245 def patch(a, bin):
249 return mpatch.patches(a, [bin])
246 return mpatch.patches(a, [bin])
250
247
251 # similar to difflib.SequenceMatcher.get_matching_blocks
248 # similar to difflib.SequenceMatcher.get_matching_blocks
252 def get_matching_blocks(a, b):
249 def get_matching_blocks(a, b):
253 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
250 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
254
251
255 def trivialdiffheader(length):
252 def trivialdiffheader(length):
256 return struct.pack(">lll", 0, 0, length)
253 return struct.pack(">lll", 0, 0, length)
257
254
258 patches = mpatch.patches
255 patches = mpatch.patches
259 patchedsize = mpatch.patchedsize
256 patchedsize = mpatch.patchedsize
260 textdiff = bdiff.bdiff
257 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now