##// END OF EJS Templates
mdiff: speed up showfunc for large diffs...
Brodie Rao -
r15141:16dc9a32 default
parent child Browse files
Show More
@@ -1,277 +1,281 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import bdiff, mpatch, util
10 10 import re, struct
11 11
12 12 def splitnewlines(text):
13 13 '''like str.splitlines, but only split on newlines.'''
14 14 lines = [l + '\n' for l in text.split('\n')]
15 15 if lines:
16 16 if lines[-1] == '\n':
17 17 lines.pop()
18 18 else:
19 19 lines[-1] = lines[-1][:-1]
20 20 return lines
21 21
22 22 class diffopts(object):
23 23 '''context is the number of context lines
24 24 text treats all files as text
25 25 showfunc enables diff -p output
26 26 git enables the git extended patch format
27 27 nodates removes dates from diff headers
28 28 ignorews ignores all whitespace changes in the diff
29 29 ignorewsamount ignores changes in the amount of whitespace
30 30 ignoreblanklines ignores changes whose lines are all blank
31 31 upgrade generates git diffs to avoid data loss
32 32 '''
33 33
34 34 defaults = {
35 35 'context': 3,
36 36 'text': False,
37 37 'showfunc': False,
38 38 'git': False,
39 39 'nodates': False,
40 40 'ignorews': False,
41 41 'ignorewsamount': False,
42 42 'ignoreblanklines': False,
43 43 'upgrade': False,
44 44 }
45 45
46 46 __slots__ = defaults.keys()
47 47
48 48 def __init__(self, **opts):
49 49 for k in self.__slots__:
50 50 v = opts.get(k)
51 51 if v is None:
52 52 v = self.defaults[k]
53 53 setattr(self, k, v)
54 54
55 55 try:
56 56 self.context = int(self.context)
57 57 except ValueError:
58 58 raise util.Abort(_('diff context lines count must be '
59 59 'an integer, not %r') % self.context)
60 60
61 61 def copy(self, **kwargs):
62 62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 63 opts.update(kwargs)
64 64 return diffopts(**opts)
65 65
66 66 defaultopts = diffopts()
67 67
68 68 def wsclean(opts, text, blank=True):
69 69 if opts.ignorews:
70 70 text = re.sub('[ \t\r]+', '', text)
71 71 elif opts.ignorewsamount:
72 72 text = re.sub('[ \t\r]+', ' ', text)
73 73 text = text.replace(' \n', '\n')
74 74 if blank and opts.ignoreblanklines:
75 75 text = re.sub('\n+', '', text)
76 76 return text
77 77
78 78 def diffline(revs, a, b, opts):
79 79 parts = ['diff']
80 80 if opts.git:
81 81 parts.append('--git')
82 82 if revs and not opts.git:
83 83 parts.append(' '.join(["-r %s" % rev for rev in revs]))
84 84 if opts.git:
85 85 parts.append('a/%s' % a)
86 86 parts.append('b/%s' % b)
87 87 else:
88 88 parts.append(a)
89 89 return ' '.join(parts) + '\n'
90 90
91 91 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
92 92 def datetag(date, addtab=True):
93 93 if not opts.git and not opts.nodates:
94 94 return '\t%s\n' % date
95 95 if addtab and ' ' in fn1:
96 96 return '\t\n'
97 97 return '\n'
98 98
99 99 if not a and not b:
100 100 return ""
101 101 epoch = util.datestr((0, 0))
102 102
103 103 if not opts.text and (util.binary(a) or util.binary(b)):
104 104 if a and b and len(a) == len(b) and a == b:
105 105 return ""
106 106 l = ['Binary file %s has changed\n' % fn1]
107 107 elif not a:
108 108 b = splitnewlines(b)
109 109 if a is None:
110 110 l1 = '--- /dev/null%s' % datetag(epoch, False)
111 111 else:
112 112 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
113 113 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
114 114 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
115 115 l = [l1, l2, l3] + ["+" + e for e in b]
116 116 elif not b:
117 117 a = splitnewlines(a)
118 118 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
119 119 if b is None:
120 120 l2 = '+++ /dev/null%s' % datetag(epoch, False)
121 121 else:
122 122 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
123 123 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
124 124 l = [l1, l2, l3] + ["-" + e for e in a]
125 125 else:
126 126 al = splitnewlines(a)
127 127 bl = splitnewlines(b)
128 128 l = list(_unidiff(a, b, al, bl, opts=opts))
129 129 if not l:
130 130 return ""
131 131
132 132 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
133 133 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
134 134
135 135 for ln in xrange(len(l)):
136 136 if l[ln][-1] != '\n':
137 137 l[ln] += "\n\ No newline at end of file\n"
138 138
139 139 if r:
140 140 l.insert(0, diffline(r, fn1, fn2, opts))
141 141
142 142 return "".join(l)
143 143
144 144 # creates a headerless unified diff
145 145 # t1 and t2 are the text to be diffed
146 146 # l1 and l2 are the text broken up into lines
147 147 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
148 148 def contextend(l, len):
149 149 ret = l + opts.context
150 150 if ret > len:
151 151 ret = len
152 152 return ret
153 153
154 154 def contextstart(l):
155 155 ret = l - opts.context
156 156 if ret < 0:
157 157 return 0
158 158 return ret
159 159
160 lastfunc = [0, '']
160 161 def yieldhunk(hunk):
161 162 (astart, a2, bstart, b2, delta) = hunk
162 163 aend = contextend(a2, len(l1))
163 164 alen = aend - astart
164 165 blen = b2 - bstart + aend - a2
165 166
166 167 func = ""
167 168 if opts.showfunc:
168 # walk backwards from the start of the context
169 # to find a line starting with an alphanumeric char.
170 for x in xrange(astart - 1, -1, -1):
171 t = l1[x].rstrip()
172 if funcre.match(t):
173 func = ' ' + t[:40]
169 lastpos, func = lastfunc
170 # walk backwards from the start of the context up to the start of
171 # the previous hunk context until we find a line starting with an
172 # alphanumeric char.
173 for i in xrange(astart - 1, lastpos - 1, -1):
174 if l1[i][0].isalnum():
175 func = ' ' + l1[i].rstrip()[:40]
176 lastfunc[1] = func
174 177 break
178 # by recording this hunk's starting point as the next place to
179 # start looking for function lines, we avoid reading any line in
180 # the file more than once.
181 lastfunc[0] = astart
175 182
176 183 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
177 184 bstart + 1, blen, func)
178 185 for x in delta:
179 186 yield x
180 187 for x in xrange(a2, aend):
181 188 yield ' ' + l1[x]
182 189
183 if opts.showfunc:
184 funcre = re.compile('\w')
185
186 190 # bdiff.blocks gives us the matching sequences in the files. The loop
187 191 # below finds the spaces between those matching sequences and translates
188 192 # them into diff output.
189 193 #
190 194 if opts.ignorews or opts.ignorewsamount:
191 195 t1 = wsclean(opts, t1, False)
192 196 t2 = wsclean(opts, t2, False)
193 197
194 198 diff = bdiff.blocks(t1, t2)
195 199 hunk = None
196 200 for i, s1 in enumerate(diff):
197 201 # The first match is special.
198 202 # we've either found a match starting at line 0 or a match later
199 203 # in the file. If it starts later, old and new below will both be
200 204 # empty and we'll continue to the next match.
201 205 if i > 0:
202 206 s = diff[i - 1]
203 207 else:
204 208 s = [0, 0, 0, 0]
205 209 delta = []
206 210 a1 = s[1]
207 211 a2 = s1[0]
208 212 b1 = s[3]
209 213 b2 = s1[2]
210 214
211 215 old = l1[a1:a2]
212 216 new = l2[b1:b2]
213 217
214 218 # bdiff sometimes gives huge matches past eof, this check eats them,
215 219 # and deals with the special first match case described above
216 220 if not old and not new:
217 221 continue
218 222
219 223 if opts.ignoreblanklines:
220 224 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
221 225 continue
222 226
223 227 astart = contextstart(a1)
224 228 bstart = contextstart(b1)
225 229 prev = None
226 230 if hunk:
227 231 # join with the previous hunk if it falls inside the context
228 232 if astart < hunk[1] + opts.context + 1:
229 233 prev = hunk
230 234 astart = hunk[1]
231 235 bstart = hunk[3]
232 236 else:
233 237 for x in yieldhunk(hunk):
234 238 yield x
235 239 if prev:
236 240 # we've joined the previous hunk, record the new ending points.
237 241 hunk[1] = a2
238 242 hunk[3] = b2
239 243 delta = hunk[4]
240 244 else:
241 245 # create a new hunk
242 246 hunk = [astart, a2, bstart, b2, delta]
243 247
244 248 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
245 249 delta[len(delta):] = ['-' + x for x in old]
246 250 delta[len(delta):] = ['+' + x for x in new]
247 251
248 252 if hunk:
249 253 for x in yieldhunk(hunk):
250 254 yield x
251 255
252 256 def patchtext(bin):
253 257 pos = 0
254 258 t = []
255 259 while pos < len(bin):
256 260 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
257 261 pos += 12
258 262 t.append(bin[pos:pos + l])
259 263 pos += l
260 264 return "".join(t)
261 265
262 266 def patch(a, bin):
263 267 if len(a) == 0:
264 268 # skip over trivial delta header
265 269 return buffer(bin, 12)
266 270 return mpatch.patches(a, [bin])
267 271
268 272 # similar to difflib.SequenceMatcher.get_matching_blocks
269 273 def get_matching_blocks(a, b):
270 274 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
271 275
272 276 def trivialdiffheader(length):
273 277 return struct.pack(">lll", 0, 0, length)
274 278
275 279 patches = mpatch.patches
276 280 patchedsize = mpatch.patchedsize
277 281 textdiff = bdiff.bdiff
General Comments 0
You need to be logged in to leave comments. Login now