##// END OF EJS Templates
mdiff: speed up showfunc for large diffs...
Brodie Rao -
r15141:16dc9a32 default
parent child Browse files
Show More
@@ -1,277 +1,281
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import bdiff, mpatch, util
9 import bdiff, mpatch, util
10 import re, struct
10 import re, struct
11
11
12 def splitnewlines(text):
12 def splitnewlines(text):
13 '''like str.splitlines, but only split on newlines.'''
13 '''like str.splitlines, but only split on newlines.'''
14 lines = [l + '\n' for l in text.split('\n')]
14 lines = [l + '\n' for l in text.split('\n')]
15 if lines:
15 if lines:
16 if lines[-1] == '\n':
16 if lines[-1] == '\n':
17 lines.pop()
17 lines.pop()
18 else:
18 else:
19 lines[-1] = lines[-1][:-1]
19 lines[-1] = lines[-1][:-1]
20 return lines
20 return lines
21
21
22 class diffopts(object):
22 class diffopts(object):
23 '''context is the number of context lines
23 '''context is the number of context lines
24 text treats all files as text
24 text treats all files as text
25 showfunc enables diff -p output
25 showfunc enables diff -p output
26 git enables the git extended patch format
26 git enables the git extended patch format
27 nodates removes dates from diff headers
27 nodates removes dates from diff headers
28 ignorews ignores all whitespace changes in the diff
28 ignorews ignores all whitespace changes in the diff
29 ignorewsamount ignores changes in the amount of whitespace
29 ignorewsamount ignores changes in the amount of whitespace
30 ignoreblanklines ignores changes whose lines are all blank
30 ignoreblanklines ignores changes whose lines are all blank
31 upgrade generates git diffs to avoid data loss
31 upgrade generates git diffs to avoid data loss
32 '''
32 '''
33
33
34 defaults = {
34 defaults = {
35 'context': 3,
35 'context': 3,
36 'text': False,
36 'text': False,
37 'showfunc': False,
37 'showfunc': False,
38 'git': False,
38 'git': False,
39 'nodates': False,
39 'nodates': False,
40 'ignorews': False,
40 'ignorews': False,
41 'ignorewsamount': False,
41 'ignorewsamount': False,
42 'ignoreblanklines': False,
42 'ignoreblanklines': False,
43 'upgrade': False,
43 'upgrade': False,
44 }
44 }
45
45
46 __slots__ = defaults.keys()
46 __slots__ = defaults.keys()
47
47
48 def __init__(self, **opts):
48 def __init__(self, **opts):
49 for k in self.__slots__:
49 for k in self.__slots__:
50 v = opts.get(k)
50 v = opts.get(k)
51 if v is None:
51 if v is None:
52 v = self.defaults[k]
52 v = self.defaults[k]
53 setattr(self, k, v)
53 setattr(self, k, v)
54
54
55 try:
55 try:
56 self.context = int(self.context)
56 self.context = int(self.context)
57 except ValueError:
57 except ValueError:
58 raise util.Abort(_('diff context lines count must be '
58 raise util.Abort(_('diff context lines count must be '
59 'an integer, not %r') % self.context)
59 'an integer, not %r') % self.context)
60
60
61 def copy(self, **kwargs):
61 def copy(self, **kwargs):
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 opts.update(kwargs)
63 opts.update(kwargs)
64 return diffopts(**opts)
64 return diffopts(**opts)
65
65
66 defaultopts = diffopts()
66 defaultopts = diffopts()
67
67
68 def wsclean(opts, text, blank=True):
68 def wsclean(opts, text, blank=True):
69 if opts.ignorews:
69 if opts.ignorews:
70 text = re.sub('[ \t\r]+', '', text)
70 text = re.sub('[ \t\r]+', '', text)
71 elif opts.ignorewsamount:
71 elif opts.ignorewsamount:
72 text = re.sub('[ \t\r]+', ' ', text)
72 text = re.sub('[ \t\r]+', ' ', text)
73 text = text.replace(' \n', '\n')
73 text = text.replace(' \n', '\n')
74 if blank and opts.ignoreblanklines:
74 if blank and opts.ignoreblanklines:
75 text = re.sub('\n+', '', text)
75 text = re.sub('\n+', '', text)
76 return text
76 return text
77
77
78 def diffline(revs, a, b, opts):
78 def diffline(revs, a, b, opts):
79 parts = ['diff']
79 parts = ['diff']
80 if opts.git:
80 if opts.git:
81 parts.append('--git')
81 parts.append('--git')
82 if revs and not opts.git:
82 if revs and not opts.git:
83 parts.append(' '.join(["-r %s" % rev for rev in revs]))
83 parts.append(' '.join(["-r %s" % rev for rev in revs]))
84 if opts.git:
84 if opts.git:
85 parts.append('a/%s' % a)
85 parts.append('a/%s' % a)
86 parts.append('b/%s' % b)
86 parts.append('b/%s' % b)
87 else:
87 else:
88 parts.append(a)
88 parts.append(a)
89 return ' '.join(parts) + '\n'
89 return ' '.join(parts) + '\n'
90
90
91 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
91 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
92 def datetag(date, addtab=True):
92 def datetag(date, addtab=True):
93 if not opts.git and not opts.nodates:
93 if not opts.git and not opts.nodates:
94 return '\t%s\n' % date
94 return '\t%s\n' % date
95 if addtab and ' ' in fn1:
95 if addtab and ' ' in fn1:
96 return '\t\n'
96 return '\t\n'
97 return '\n'
97 return '\n'
98
98
99 if not a and not b:
99 if not a and not b:
100 return ""
100 return ""
101 epoch = util.datestr((0, 0))
101 epoch = util.datestr((0, 0))
102
102
103 if not opts.text and (util.binary(a) or util.binary(b)):
103 if not opts.text and (util.binary(a) or util.binary(b)):
104 if a and b and len(a) == len(b) and a == b:
104 if a and b and len(a) == len(b) and a == b:
105 return ""
105 return ""
106 l = ['Binary file %s has changed\n' % fn1]
106 l = ['Binary file %s has changed\n' % fn1]
107 elif not a:
107 elif not a:
108 b = splitnewlines(b)
108 b = splitnewlines(b)
109 if a is None:
109 if a is None:
110 l1 = '--- /dev/null%s' % datetag(epoch, False)
110 l1 = '--- /dev/null%s' % datetag(epoch, False)
111 else:
111 else:
112 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
112 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
113 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
113 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
114 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
114 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
115 l = [l1, l2, l3] + ["+" + e for e in b]
115 l = [l1, l2, l3] + ["+" + e for e in b]
116 elif not b:
116 elif not b:
117 a = splitnewlines(a)
117 a = splitnewlines(a)
118 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
118 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
119 if b is None:
119 if b is None:
120 l2 = '+++ /dev/null%s' % datetag(epoch, False)
120 l2 = '+++ /dev/null%s' % datetag(epoch, False)
121 else:
121 else:
122 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
122 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
123 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
123 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
124 l = [l1, l2, l3] + ["-" + e for e in a]
124 l = [l1, l2, l3] + ["-" + e for e in a]
125 else:
125 else:
126 al = splitnewlines(a)
126 al = splitnewlines(a)
127 bl = splitnewlines(b)
127 bl = splitnewlines(b)
128 l = list(_unidiff(a, b, al, bl, opts=opts))
128 l = list(_unidiff(a, b, al, bl, opts=opts))
129 if not l:
129 if not l:
130 return ""
130 return ""
131
131
132 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
132 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
133 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
133 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
134
134
135 for ln in xrange(len(l)):
135 for ln in xrange(len(l)):
136 if l[ln][-1] != '\n':
136 if l[ln][-1] != '\n':
137 l[ln] += "\n\ No newline at end of file\n"
137 l[ln] += "\n\ No newline at end of file\n"
138
138
139 if r:
139 if r:
140 l.insert(0, diffline(r, fn1, fn2, opts))
140 l.insert(0, diffline(r, fn1, fn2, opts))
141
141
142 return "".join(l)
142 return "".join(l)
143
143
144 # creates a headerless unified diff
144 # creates a headerless unified diff
145 # t1 and t2 are the text to be diffed
145 # t1 and t2 are the text to be diffed
146 # l1 and l2 are the text broken up into lines
146 # l1 and l2 are the text broken up into lines
147 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
147 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
148 def contextend(l, len):
148 def contextend(l, len):
149 ret = l + opts.context
149 ret = l + opts.context
150 if ret > len:
150 if ret > len:
151 ret = len
151 ret = len
152 return ret
152 return ret
153
153
154 def contextstart(l):
154 def contextstart(l):
155 ret = l - opts.context
155 ret = l - opts.context
156 if ret < 0:
156 if ret < 0:
157 return 0
157 return 0
158 return ret
158 return ret
159
159
160 lastfunc = [0, '']
160 def yieldhunk(hunk):
161 def yieldhunk(hunk):
161 (astart, a2, bstart, b2, delta) = hunk
162 (astart, a2, bstart, b2, delta) = hunk
162 aend = contextend(a2, len(l1))
163 aend = contextend(a2, len(l1))
163 alen = aend - astart
164 alen = aend - astart
164 blen = b2 - bstart + aend - a2
165 blen = b2 - bstart + aend - a2
165
166
166 func = ""
167 func = ""
167 if opts.showfunc:
168 if opts.showfunc:
168 # walk backwards from the start of the context
169 lastpos, func = lastfunc
169 # to find a line starting with an alphanumeric char.
170 # walk backwards from the start of the context up to the start of
170 for x in xrange(astart - 1, -1, -1):
171 # the previous hunk context until we find a line starting with an
171 t = l1[x].rstrip()
172 # alphanumeric char.
172 if funcre.match(t):
173 for i in xrange(astart - 1, lastpos - 1, -1):
173 func = ' ' + t[:40]
174 if l1[i][0].isalnum():
175 func = ' ' + l1[i].rstrip()[:40]
176 lastfunc[1] = func
174 break
177 break
178 # by recording this hunk's starting point as the next place to
179 # start looking for function lines, we avoid reading any line in
180 # the file more than once.
181 lastfunc[0] = astart
175
182
176 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
183 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
177 bstart + 1, blen, func)
184 bstart + 1, blen, func)
178 for x in delta:
185 for x in delta:
179 yield x
186 yield x
180 for x in xrange(a2, aend):
187 for x in xrange(a2, aend):
181 yield ' ' + l1[x]
188 yield ' ' + l1[x]
182
189
183 if opts.showfunc:
184 funcre = re.compile('\w')
185
186 # bdiff.blocks gives us the matching sequences in the files. The loop
190 # bdiff.blocks gives us the matching sequences in the files. The loop
187 # below finds the spaces between those matching sequences and translates
191 # below finds the spaces between those matching sequences and translates
188 # them into diff output.
192 # them into diff output.
189 #
193 #
190 if opts.ignorews or opts.ignorewsamount:
194 if opts.ignorews or opts.ignorewsamount:
191 t1 = wsclean(opts, t1, False)
195 t1 = wsclean(opts, t1, False)
192 t2 = wsclean(opts, t2, False)
196 t2 = wsclean(opts, t2, False)
193
197
194 diff = bdiff.blocks(t1, t2)
198 diff = bdiff.blocks(t1, t2)
195 hunk = None
199 hunk = None
196 for i, s1 in enumerate(diff):
200 for i, s1 in enumerate(diff):
197 # The first match is special.
201 # The first match is special.
198 # we've either found a match starting at line 0 or a match later
202 # we've either found a match starting at line 0 or a match later
199 # in the file. If it starts later, old and new below will both be
203 # in the file. If it starts later, old and new below will both be
200 # empty and we'll continue to the next match.
204 # empty and we'll continue to the next match.
201 if i > 0:
205 if i > 0:
202 s = diff[i - 1]
206 s = diff[i - 1]
203 else:
207 else:
204 s = [0, 0, 0, 0]
208 s = [0, 0, 0, 0]
205 delta = []
209 delta = []
206 a1 = s[1]
210 a1 = s[1]
207 a2 = s1[0]
211 a2 = s1[0]
208 b1 = s[3]
212 b1 = s[3]
209 b2 = s1[2]
213 b2 = s1[2]
210
214
211 old = l1[a1:a2]
215 old = l1[a1:a2]
212 new = l2[b1:b2]
216 new = l2[b1:b2]
213
217
214 # bdiff sometimes gives huge matches past eof, this check eats them,
218 # bdiff sometimes gives huge matches past eof, this check eats them,
215 # and deals with the special first match case described above
219 # and deals with the special first match case described above
216 if not old and not new:
220 if not old and not new:
217 continue
221 continue
218
222
219 if opts.ignoreblanklines:
223 if opts.ignoreblanklines:
220 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
224 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
221 continue
225 continue
222
226
223 astart = contextstart(a1)
227 astart = contextstart(a1)
224 bstart = contextstart(b1)
228 bstart = contextstart(b1)
225 prev = None
229 prev = None
226 if hunk:
230 if hunk:
227 # join with the previous hunk if it falls inside the context
231 # join with the previous hunk if it falls inside the context
228 if astart < hunk[1] + opts.context + 1:
232 if astart < hunk[1] + opts.context + 1:
229 prev = hunk
233 prev = hunk
230 astart = hunk[1]
234 astart = hunk[1]
231 bstart = hunk[3]
235 bstart = hunk[3]
232 else:
236 else:
233 for x in yieldhunk(hunk):
237 for x in yieldhunk(hunk):
234 yield x
238 yield x
235 if prev:
239 if prev:
236 # we've joined the previous hunk, record the new ending points.
240 # we've joined the previous hunk, record the new ending points.
237 hunk[1] = a2
241 hunk[1] = a2
238 hunk[3] = b2
242 hunk[3] = b2
239 delta = hunk[4]
243 delta = hunk[4]
240 else:
244 else:
241 # create a new hunk
245 # create a new hunk
242 hunk = [astart, a2, bstart, b2, delta]
246 hunk = [astart, a2, bstart, b2, delta]
243
247
244 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
248 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
245 delta[len(delta):] = ['-' + x for x in old]
249 delta[len(delta):] = ['-' + x for x in old]
246 delta[len(delta):] = ['+' + x for x in new]
250 delta[len(delta):] = ['+' + x for x in new]
247
251
248 if hunk:
252 if hunk:
249 for x in yieldhunk(hunk):
253 for x in yieldhunk(hunk):
250 yield x
254 yield x
251
255
252 def patchtext(bin):
256 def patchtext(bin):
253 pos = 0
257 pos = 0
254 t = []
258 t = []
255 while pos < len(bin):
259 while pos < len(bin):
256 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
260 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
257 pos += 12
261 pos += 12
258 t.append(bin[pos:pos + l])
262 t.append(bin[pos:pos + l])
259 pos += l
263 pos += l
260 return "".join(t)
264 return "".join(t)
261
265
262 def patch(a, bin):
266 def patch(a, bin):
263 if len(a) == 0:
267 if len(a) == 0:
264 # skip over trivial delta header
268 # skip over trivial delta header
265 return buffer(bin, 12)
269 return buffer(bin, 12)
266 return mpatch.patches(a, [bin])
270 return mpatch.patches(a, [bin])
267
271
268 # similar to difflib.SequenceMatcher.get_matching_blocks
272 # similar to difflib.SequenceMatcher.get_matching_blocks
269 def get_matching_blocks(a, b):
273 def get_matching_blocks(a, b):
270 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
274 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
271
275
272 def trivialdiffheader(length):
276 def trivialdiffheader(length):
273 return struct.pack(">lll", 0, 0, length)
277 return struct.pack(">lll", 0, 0, length)
274
278
275 patches = mpatch.patches
279 patches = mpatch.patches
276 patchedsize = mpatch.patchedsize
280 patchedsize = mpatch.patchedsize
277 textdiff = bdiff.bdiff
281 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now