##// END OF EJS Templates
mdiff: carriage return (\r) is also ignorable whitespace
Mads Kiilerich -
r12751:8eb758ea default
parent child Browse files
Show More
@@ -1,277 +1,277
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import bdiff, mpatch, util
10 10 import re, struct
11 11
12 12 def splitnewlines(text):
13 13 '''like str.splitlines, but only split on newlines.'''
14 14 lines = [l + '\n' for l in text.split('\n')]
15 15 if lines:
16 16 if lines[-1] == '\n':
17 17 lines.pop()
18 18 else:
19 19 lines[-1] = lines[-1][:-1]
20 20 return lines
21 21
22 22 class diffopts(object):
23 23 '''context is the number of context lines
24 24 text treats all files as text
25 25 showfunc enables diff -p output
26 26 git enables the git extended patch format
27 27 nodates removes dates from diff headers
28 28 ignorews ignores all whitespace changes in the diff
29 29 ignorewsamount ignores changes in the amount of whitespace
30 30 ignoreblanklines ignores changes whose lines are all blank
31 31 upgrade generates git diffs to avoid data loss
32 32 '''
33 33
34 34 defaults = {
35 35 'context': 3,
36 36 'text': False,
37 37 'showfunc': False,
38 38 'git': False,
39 39 'nodates': False,
40 40 'ignorews': False,
41 41 'ignorewsamount': False,
42 42 'ignoreblanklines': False,
43 43 'upgrade': False,
44 44 }
45 45
46 46 __slots__ = defaults.keys()
47 47
48 48 def __init__(self, **opts):
49 49 for k in self.__slots__:
50 50 v = opts.get(k)
51 51 if v is None:
52 52 v = self.defaults[k]
53 53 setattr(self, k, v)
54 54
55 55 try:
56 56 self.context = int(self.context)
57 57 except ValueError:
58 58 raise util.Abort(_('diff context lines count must be '
59 59 'an integer, not %r') % self.context)
60 60
61 61 def copy(self, **kwargs):
62 62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 63 opts.update(kwargs)
64 64 return diffopts(**opts)
65 65
66 66 defaultopts = diffopts()
67 67
68 68 def wsclean(opts, text, blank=True):
69 69 if opts.ignorews:
70 text = re.sub('[ \t]+', '', text)
70 text = re.sub('[ \t\r]+', '', text)
71 71 elif opts.ignorewsamount:
72 text = re.sub('[ \t]+', ' ', text)
73 text = re.sub('[ \t]+\n', '\n', text)
72 text = re.sub('[ \t\r]+', ' ', text)
73 text = text.replace(' \n', '\n')
74 74 if blank and opts.ignoreblanklines:
75 75 text = re.sub('\n+', '', text)
76 76 return text
77 77
78 78 def diffline(revs, a, b, opts):
79 79 parts = ['diff']
80 80 if opts.git:
81 81 parts.append('--git')
82 82 if revs and not opts.git:
83 83 parts.append(' '.join(["-r %s" % rev for rev in revs]))
84 84 if opts.git:
85 85 parts.append('a/%s' % a)
86 86 parts.append('b/%s' % b)
87 87 else:
88 88 parts.append(a)
89 89 return ' '.join(parts) + '\n'
90 90
91 91 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
92 92 def datetag(date, addtab=True):
93 93 if not opts.git and not opts.nodates:
94 94 return '\t%s\n' % date
95 95 if addtab and ' ' in fn1:
96 96 return '\t\n'
97 97 return '\n'
98 98
99 99 if not a and not b:
100 100 return ""
101 101 epoch = util.datestr((0, 0))
102 102
103 103 if not opts.text and (util.binary(a) or util.binary(b)):
104 104 if a and b and len(a) == len(b) and a == b:
105 105 return ""
106 106 l = ['Binary file %s has changed\n' % fn1]
107 107 elif not a:
108 108 b = splitnewlines(b)
109 109 if a is None:
110 110 l1 = '--- /dev/null%s' % datetag(epoch, False)
111 111 else:
112 112 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
113 113 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
114 114 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
115 115 l = [l1, l2, l3] + ["+" + e for e in b]
116 116 elif not b:
117 117 a = splitnewlines(a)
118 118 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
119 119 if b is None:
120 120 l2 = '+++ /dev/null%s' % datetag(epoch, False)
121 121 else:
122 122 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
123 123 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
124 124 l = [l1, l2, l3] + ["-" + e for e in a]
125 125 else:
126 126 al = splitnewlines(a)
127 127 bl = splitnewlines(b)
128 128 l = list(_unidiff(a, b, al, bl, opts=opts))
129 129 if not l:
130 130 return ""
131 131
132 132 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
133 133 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
134 134
135 135 for ln in xrange(len(l)):
136 136 if l[ln][-1] != '\n':
137 137 l[ln] += "\n\ No newline at end of file\n"
138 138
139 139 if r:
140 140 l.insert(0, diffline(r, fn1, fn2, opts))
141 141
142 142 return "".join(l)
143 143
144 144 # creates a headerless unified diff
145 145 # t1 and t2 are the text to be diffed
146 146 # l1 and l2 are the text broken up into lines
147 147 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
148 148 def contextend(l, len):
149 149 ret = l + opts.context
150 150 if ret > len:
151 151 ret = len
152 152 return ret
153 153
154 154 def contextstart(l):
155 155 ret = l - opts.context
156 156 if ret < 0:
157 157 return 0
158 158 return ret
159 159
160 160 def yieldhunk(hunk):
161 161 (astart, a2, bstart, b2, delta) = hunk
162 162 aend = contextend(a2, len(l1))
163 163 alen = aend - astart
164 164 blen = b2 - bstart + aend - a2
165 165
166 166 func = ""
167 167 if opts.showfunc:
168 168 # walk backwards from the start of the context
169 169 # to find a line starting with an alphanumeric char.
170 170 for x in xrange(astart - 1, -1, -1):
171 171 t = l1[x].rstrip()
172 172 if funcre.match(t):
173 173 func = ' ' + t[:40]
174 174 break
175 175
176 176 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
177 177 bstart + 1, blen, func)
178 178 for x in delta:
179 179 yield x
180 180 for x in xrange(a2, aend):
181 181 yield ' ' + l1[x]
182 182
183 183 if opts.showfunc:
184 184 funcre = re.compile('\w')
185 185
186 186 # bdiff.blocks gives us the matching sequences in the files. The loop
187 187 # below finds the spaces between those matching sequences and translates
188 188 # them into diff output.
189 189 #
190 190 if opts.ignorews or opts.ignorewsamount:
191 191 t1 = wsclean(opts, t1, False)
192 192 t2 = wsclean(opts, t2, False)
193 193
194 194 diff = bdiff.blocks(t1, t2)
195 195 hunk = None
196 196 for i, s1 in enumerate(diff):
197 197 # The first match is special.
198 198 # we've either found a match starting at line 0 or a match later
199 199 # in the file. If it starts later, old and new below will both be
200 200 # empty and we'll continue to the next match.
201 201 if i > 0:
202 202 s = diff[i - 1]
203 203 else:
204 204 s = [0, 0, 0, 0]
205 205 delta = []
206 206 a1 = s[1]
207 207 a2 = s1[0]
208 208 b1 = s[3]
209 209 b2 = s1[2]
210 210
211 211 old = l1[a1:a2]
212 212 new = l2[b1:b2]
213 213
214 214 # bdiff sometimes gives huge matches past eof, this check eats them,
215 215 # and deals with the special first match case described above
216 216 if not old and not new:
217 217 continue
218 218
219 219 if opts.ignoreblanklines:
220 220 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
221 221 continue
222 222
223 223 astart = contextstart(a1)
224 224 bstart = contextstart(b1)
225 225 prev = None
226 226 if hunk:
227 227 # join with the previous hunk if it falls inside the context
228 228 if astart < hunk[1] + opts.context + 1:
229 229 prev = hunk
230 230 astart = hunk[1]
231 231 bstart = hunk[3]
232 232 else:
233 233 for x in yieldhunk(hunk):
234 234 yield x
235 235 if prev:
236 236 # we've joined the previous hunk, record the new ending points.
237 237 hunk[1] = a2
238 238 hunk[3] = b2
239 239 delta = hunk[4]
240 240 else:
241 241 # create a new hunk
242 242 hunk = [astart, a2, bstart, b2, delta]
243 243
244 244 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
245 245 delta[len(delta):] = ['-' + x for x in old]
246 246 delta[len(delta):] = ['+' + x for x in new]
247 247
248 248 if hunk:
249 249 for x in yieldhunk(hunk):
250 250 yield x
251 251
252 252 def patchtext(bin):
253 253 pos = 0
254 254 t = []
255 255 while pos < len(bin):
256 256 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
257 257 pos += 12
258 258 t.append(bin[pos:pos + l])
259 259 pos += l
260 260 return "".join(t)
261 261
262 262 def patch(a, bin):
263 263 if len(a) == 0:
264 264 # skip over trivial delta header
265 265 return buffer(bin, 12)
266 266 return mpatch.patches(a, [bin])
267 267
268 268 # similar to difflib.SequenceMatcher.get_matching_blocks
269 269 def get_matching_blocks(a, b):
270 270 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
271 271
272 272 def trivialdiffheader(length):
273 273 return struct.pack(">lll", 0, 0, length)
274 274
275 275 patches = mpatch.patches
276 276 patchedsize = mpatch.patchedsize
277 277 textdiff = bdiff.bdiff
@@ -1,392 +1,446
1 1 GNU diff is the reference for all of these results.
2 2
3 3 Prepare tests:
4 4
5 5 $ echo '[alias]' >> $HGRCPATH
6 6 $ echo 'ndiff = diff --nodates' >> $HGRCPATH
7 7
8 8 $ hg init
9 9 $ printf 'hello world\ngoodbye world\n' >foo
10 10 $ hg ci -Amfoo -ufoo
11 11 adding foo
12 12
13 13
14 14 Test added blank lines:
15 15
16 16 $ printf '\nhello world\n\ngoodbye world\n\n' >foo
17 17
18 18 >>> two diffs showing three added lines <<<
19 19
20 20 $ hg ndiff
21 21 diff -r 540c40a65b78 foo
22 22 --- a/foo
23 23 +++ b/foo
24 24 @@ -1,2 +1,5 @@
25 25 +
26 26 hello world
27 27 +
28 28 goodbye world
29 29 +
30 30 $ hg ndiff -b
31 31 diff -r 540c40a65b78 foo
32 32 --- a/foo
33 33 +++ b/foo
34 34 @@ -1,2 +1,5 @@
35 35 +
36 36 hello world
37 37 +
38 38 goodbye world
39 39 +
40 40
41 41 >>> no diffs <<<
42 42
43 43 $ hg ndiff -B
44 44 $ hg ndiff -Bb
45 45
46 46
47 47 Test added horizontal space first on a line():
48 48
49 49 $ printf '\t hello world\ngoodbye world\n' >foo
50 50
51 51 >>> four diffs showing added space first on the first line <<<
52 52
53 53 $ hg ndiff
54 54 diff -r 540c40a65b78 foo
55 55 --- a/foo
56 56 +++ b/foo
57 57 @@ -1,2 +1,2 @@
58 58 -hello world
59 59 + hello world
60 60 goodbye world
61 61
62 62 $ hg ndiff -b
63 63 diff -r 540c40a65b78 foo
64 64 --- a/foo
65 65 +++ b/foo
66 66 @@ -1,2 +1,2 @@
67 67 -hello world
68 68 + hello world
69 69 goodbye world
70 70
71 71 $ hg ndiff -B
72 72 diff -r 540c40a65b78 foo
73 73 --- a/foo
74 74 +++ b/foo
75 75 @@ -1,2 +1,2 @@
76 76 -hello world
77 77 + hello world
78 78 goodbye world
79 79
80 80 $ hg ndiff -Bb
81 81 diff -r 540c40a65b78 foo
82 82 --- a/foo
83 83 +++ b/foo
84 84 @@ -1,2 +1,2 @@
85 85 -hello world
86 86 + hello world
87 87 goodbye world
88 88
89 89
90 90 Test added horizontal space last on a line:
91 91
92 92 $ printf 'hello world\t \ngoodbye world\n' >foo
93 93
94 94 >>> two diffs showing space appended to the first line <<<
95 95
96 96 $ hg ndiff
97 97 diff -r 540c40a65b78 foo
98 98 --- a/foo
99 99 +++ b/foo
100 100 @@ -1,2 +1,2 @@
101 101 -hello world
102 102 +hello world
103 103 goodbye world
104 104
105 105 $ hg ndiff -B
106 106 diff -r 540c40a65b78 foo
107 107 --- a/foo
108 108 +++ b/foo
109 109 @@ -1,2 +1,2 @@
110 110 -hello world
111 111 +hello world
112 112 goodbye world
113 113
114 114 >>> no diffs <<<
115 115
116 116 $ hg ndiff -b
117 117 $ hg ndiff -Bb
118 118
119 119
120 120 Test added horizontal space in the middle of a word:
121 121
122 122 $ printf 'hello world\ngood bye world\n' >foo
123 123
124 124 >>> four diffs showing space inserted into "goodbye" <<<
125 125
126 126 $ hg ndiff
127 127 diff -r 540c40a65b78 foo
128 128 --- a/foo
129 129 +++ b/foo
130 130 @@ -1,2 +1,2 @@
131 131 hello world
132 132 -goodbye world
133 133 +good bye world
134 134
135 135 $ hg ndiff -B
136 136 diff -r 540c40a65b78 foo
137 137 --- a/foo
138 138 +++ b/foo
139 139 @@ -1,2 +1,2 @@
140 140 hello world
141 141 -goodbye world
142 142 +good bye world
143 143
144 144 $ hg ndiff -b
145 145 diff -r 540c40a65b78 foo
146 146 --- a/foo
147 147 +++ b/foo
148 148 @@ -1,2 +1,2 @@
149 149 hello world
150 150 -goodbye world
151 151 +good bye world
152 152
153 153 $ hg ndiff -Bb
154 154 diff -r 540c40a65b78 foo
155 155 --- a/foo
156 156 +++ b/foo
157 157 @@ -1,2 +1,2 @@
158 158 hello world
159 159 -goodbye world
160 160 +good bye world
161 161
162 162
163 163 Test increased horizontal whitespace amount:
164 164
165 165 $ printf 'hello world\ngoodbye\t\t \tworld\n' >foo
166 166
167 167 >>> two diffs showing changed whitespace amount in the last line <<<
168 168
169 169 $ hg ndiff
170 170 diff -r 540c40a65b78 foo
171 171 --- a/foo
172 172 +++ b/foo
173 173 @@ -1,2 +1,2 @@
174 174 hello world
175 175 -goodbye world
176 176 +goodbye world
177 177
178 178 $ hg ndiff -B
179 179 diff -r 540c40a65b78 foo
180 180 --- a/foo
181 181 +++ b/foo
182 182 @@ -1,2 +1,2 @@
183 183 hello world
184 184 -goodbye world
185 185 +goodbye world
186 186
187 187 >>> no diffs <<<
188 188
189 189 $ hg ndiff -b
190 190 $ hg ndiff -Bb
191 191
192 192
193 193 Test added blank line with horizontal whitespace:
194 194
195 195 $ printf 'hello world\n \t\ngoodbye world\n' >foo
196 196
197 197 >>> three diffs showing added blank line with horizontal space <<<
198 198
199 199 $ hg ndiff
200 200 diff -r 540c40a65b78 foo
201 201 --- a/foo
202 202 +++ b/foo
203 203 @@ -1,2 +1,3 @@
204 204 hello world
205 205 +
206 206 goodbye world
207 207
208 208 $ hg ndiff -B
209 209 diff -r 540c40a65b78 foo
210 210 --- a/foo
211 211 +++ b/foo
212 212 @@ -1,2 +1,3 @@
213 213 hello world
214 214 +
215 215 goodbye world
216 216
217 217 $ hg ndiff -b
218 218 diff -r 540c40a65b78 foo
219 219 --- a/foo
220 220 +++ b/foo
221 221 @@ -1,2 +1,3 @@
222 222 hello world
223 223 +
224 224 goodbye world
225 225
226 226 >>> no diffs <<<
227 227
228 228 $ hg ndiff -Bb
229 229
230 230
231 231 Test added blank line with other whitespace:
232 232
233 233 $ printf 'hello world\n \t\ngoodbye world \n' >foo
234 234
235 235 >>> three diffs showing added blank line with other space <<<
236 236
237 237 $ hg ndiff
238 238 diff -r 540c40a65b78 foo
239 239 --- a/foo
240 240 +++ b/foo
241 241 @@ -1,2 +1,3 @@
242 242 -hello world
243 243 -goodbye world
244 244 +hello world
245 245 +
246 246 +goodbye world
247 247
248 248 $ hg ndiff -B
249 249 diff -r 540c40a65b78 foo
250 250 --- a/foo
251 251 +++ b/foo
252 252 @@ -1,2 +1,3 @@
253 253 -hello world
254 254 -goodbye world
255 255 +hello world
256 256 +
257 257 +goodbye world
258 258
259 259 $ hg ndiff -b
260 260 diff -r 540c40a65b78 foo
261 261 --- a/foo
262 262 +++ b/foo
263 263 @@ -1,2 +1,3 @@
264 264 hello world
265 265 +
266 266 goodbye world
267 267
268 268 >>> no diffs <<<
269 269
270 270 $ hg ndiff -Bb
271 271
272 272
273 273 Test whitespace changes:
274 274
275 275 $ printf 'helloworld\ngoodbye\tworld \n' >foo
276 276
277 277 >>> four diffs showing changed whitespace <<<
278 278
279 279 $ hg ndiff
280 280 diff -r 540c40a65b78 foo
281 281 --- a/foo
282 282 +++ b/foo
283 283 @@ -1,2 +1,2 @@
284 284 -hello world
285 285 -goodbye world
286 286 +helloworld
287 287 +goodbye world
288 288
289 289 $ hg ndiff -B
290 290 diff -r 540c40a65b78 foo
291 291 --- a/foo
292 292 +++ b/foo
293 293 @@ -1,2 +1,2 @@
294 294 -hello world
295 295 -goodbye world
296 296 +helloworld
297 297 +goodbye world
298 298
299 299 $ hg ndiff -b
300 300 diff -r 540c40a65b78 foo
301 301 --- a/foo
302 302 +++ b/foo
303 303 @@ -1,2 +1,2 @@
304 304 -hello world
305 305 +helloworld
306 306 goodbye world
307 307
308 308 $ hg ndiff -Bb
309 309 diff -r 540c40a65b78 foo
310 310 --- a/foo
311 311 +++ b/foo
312 312 @@ -1,2 +1,2 @@
313 313 -hello world
314 314 +helloworld
315 315 goodbye world
316 316
317 317 >>> no diffs <<<
318 318
319 319 $ hg ndiff -w
320 320
321 321
322 322 Test whitespace changes and blank lines:
323 323
324 324 $ printf 'helloworld\n\n\n\ngoodbye\tworld \n' >foo
325 325
326 326 >>> five diffs showing changed whitespace <<<
327 327
328 328 $ hg ndiff
329 329 diff -r 540c40a65b78 foo
330 330 --- a/foo
331 331 +++ b/foo
332 332 @@ -1,2 +1,5 @@
333 333 -hello world
334 334 -goodbye world
335 335 +helloworld
336 336 +
337 337 +
338 338 +
339 339 +goodbye world
340 340
341 341 $ hg ndiff -B
342 342 diff -r 540c40a65b78 foo
343 343 --- a/foo
344 344 +++ b/foo
345 345 @@ -1,2 +1,5 @@
346 346 -hello world
347 347 -goodbye world
348 348 +helloworld
349 349 +
350 350 +
351 351 +
352 352 +goodbye world
353 353
354 354 $ hg ndiff -b
355 355 diff -r 540c40a65b78 foo
356 356 --- a/foo
357 357 +++ b/foo
358 358 @@ -1,2 +1,5 @@
359 359 -hello world
360 360 +helloworld
361 361 +
362 362 +
363 363 +
364 364 goodbye world
365 365
366 366 $ hg ndiff -Bb
367 367 diff -r 540c40a65b78 foo
368 368 --- a/foo
369 369 +++ b/foo
370 370 @@ -1,2 +1,5 @@
371 371 -hello world
372 372 +helloworld
373 373 +
374 374 +
375 375 +
376 376 goodbye world
377 377
378 378 $ hg ndiff -w
379 379 diff -r 540c40a65b78 foo
380 380 --- a/foo
381 381 +++ b/foo
382 382 @@ -1,2 +1,5 @@
383 383 hello world
384 384 +
385 385 +
386 386 +
387 387 goodbye world
388 388
389 389 >>> no diffs <<<
390 390
391 391 $ hg ndiff -wB
392 392
393
394 Test \r (carriage return) as used in "DOS" line endings:
395
396 $ printf 'hello world\r\n\r\ngoodbye\rworld\n' >foo
397
398 $ hg ndiff
399 diff -r 540c40a65b78 foo
400 --- a/foo
401 +++ b/foo
402 @@ -1,2 +1,3 @@
403 -hello world
404 -goodbye world
405 +hello world
406 +
407 +goodbye
408 world
409 world
410
411 No completely blank lines to ignore:
412
413 $ hg ndiff --ignore-blank-lines
414 diff -r 540c40a65b78 foo
415 --- a/foo
416 +++ b/foo
417 @@ -1,2 +1,3 @@
418 -hello world
419 -goodbye world
420 +hello world
421 +
422 +goodbye
423 world
424 world
425
426 Only new line noticed:
427
428 $ hg ndiff --ignore-space-change
429 diff -r 540c40a65b78 foo
430 --- a/foo
431 +++ b/foo
432 @@ -1,2 +1,3 @@
433 hello world
434 +
435 goodbye world
436
437 $ hg ndiff --ignore-all-space
438 diff -r 540c40a65b78 foo
439 --- a/foo
440 +++ b/foo
441 @@ -1,2 +1,3 @@
442 hello world
443 +
444 goodbye world
445
446 New line not noticed when space change ignored:
General Comments 0
You need to be logged in to leave comments. Login now