##// END OF EJS Templates
mdiff: explicitly compute places for the newline marker...
Joerg Sonnenberger -
r35869:a9d07bd8 default
parent child Browse files
Show More
@@ -1,495 +1,521
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import re
10 import re
11 import struct
11 import struct
12 import zlib
12 import zlib
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 policy,
17 policy,
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 _missing_newline_marker = "\\ No newline at end of file\n"
23
22 bdiff = policy.importmod(r'bdiff')
24 bdiff = policy.importmod(r'bdiff')
23 mpatch = policy.importmod(r'mpatch')
25 mpatch = policy.importmod(r'mpatch')
24
26
25 blocks = bdiff.blocks
27 blocks = bdiff.blocks
26 fixws = bdiff.fixws
28 fixws = bdiff.fixws
27 patches = mpatch.patches
29 patches = mpatch.patches
28 patchedsize = mpatch.patchedsize
30 patchedsize = mpatch.patchedsize
29 textdiff = bdiff.bdiff
31 textdiff = bdiff.bdiff
30
32
31 def splitnewlines(text):
33 def splitnewlines(text):
32 '''like str.splitlines, but only split on newlines.'''
34 '''like str.splitlines, but only split on newlines.'''
33 lines = [l + '\n' for l in text.split('\n')]
35 lines = [l + '\n' for l in text.split('\n')]
34 if lines:
36 if lines:
35 if lines[-1] == '\n':
37 if lines[-1] == '\n':
36 lines.pop()
38 lines.pop()
37 else:
39 else:
38 lines[-1] = lines[-1][:-1]
40 lines[-1] = lines[-1][:-1]
39 return lines
41 return lines
40
42
41 class diffopts(object):
43 class diffopts(object):
42 '''context is the number of context lines
44 '''context is the number of context lines
43 text treats all files as text
45 text treats all files as text
44 showfunc enables diff -p output
46 showfunc enables diff -p output
45 git enables the git extended patch format
47 git enables the git extended patch format
46 nodates removes dates from diff headers
48 nodates removes dates from diff headers
47 nobinary ignores binary files
49 nobinary ignores binary files
48 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
49 ignorews ignores all whitespace changes in the diff
51 ignorews ignores all whitespace changes in the diff
50 ignorewsamount ignores changes in the amount of whitespace
52 ignorewsamount ignores changes in the amount of whitespace
51 ignoreblanklines ignores changes whose lines are all blank
53 ignoreblanklines ignores changes whose lines are all blank
52 upgrade generates git diffs to avoid data loss
54 upgrade generates git diffs to avoid data loss
53 '''
55 '''
54
56
55 defaults = {
57 defaults = {
56 'context': 3,
58 'context': 3,
57 'text': False,
59 'text': False,
58 'showfunc': False,
60 'showfunc': False,
59 'git': False,
61 'git': False,
60 'nodates': False,
62 'nodates': False,
61 'nobinary': False,
63 'nobinary': False,
62 'noprefix': False,
64 'noprefix': False,
63 'index': 0,
65 'index': 0,
64 'ignorews': False,
66 'ignorews': False,
65 'ignorewsamount': False,
67 'ignorewsamount': False,
66 'ignorewseol': False,
68 'ignorewseol': False,
67 'ignoreblanklines': False,
69 'ignoreblanklines': False,
68 'upgrade': False,
70 'upgrade': False,
69 'showsimilarity': False,
71 'showsimilarity': False,
70 'worddiff': False,
72 'worddiff': False,
71 }
73 }
72
74
73 def __init__(self, **opts):
75 def __init__(self, **opts):
74 opts = pycompat.byteskwargs(opts)
76 opts = pycompat.byteskwargs(opts)
75 for k in self.defaults.keys():
77 for k in self.defaults.keys():
76 v = opts.get(k)
78 v = opts.get(k)
77 if v is None:
79 if v is None:
78 v = self.defaults[k]
80 v = self.defaults[k]
79 setattr(self, k, v)
81 setattr(self, k, v)
80
82
81 try:
83 try:
82 self.context = int(self.context)
84 self.context = int(self.context)
83 except ValueError:
85 except ValueError:
84 raise error.Abort(_('diff context lines count must be '
86 raise error.Abort(_('diff context lines count must be '
85 'an integer, not %r') % self.context)
87 'an integer, not %r') % self.context)
86
88
87 def copy(self, **kwargs):
89 def copy(self, **kwargs):
88 opts = dict((k, getattr(self, k)) for k in self.defaults)
90 opts = dict((k, getattr(self, k)) for k in self.defaults)
89 opts = pycompat.strkwargs(opts)
91 opts = pycompat.strkwargs(opts)
90 opts.update(kwargs)
92 opts.update(kwargs)
91 return diffopts(**opts)
93 return diffopts(**opts)
92
94
93 defaultopts = diffopts()
95 defaultopts = diffopts()
94
96
95 def wsclean(opts, text, blank=True):
97 def wsclean(opts, text, blank=True):
96 if opts.ignorews:
98 if opts.ignorews:
97 text = bdiff.fixws(text, 1)
99 text = bdiff.fixws(text, 1)
98 elif opts.ignorewsamount:
100 elif opts.ignorewsamount:
99 text = bdiff.fixws(text, 0)
101 text = bdiff.fixws(text, 0)
100 if blank and opts.ignoreblanklines:
102 if blank and opts.ignoreblanklines:
101 text = re.sub('\n+', '\n', text).strip('\n')
103 text = re.sub('\n+', '\n', text).strip('\n')
102 if opts.ignorewseol:
104 if opts.ignorewseol:
103 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
104 return text
106 return text
105
107
106 def splitblock(base1, lines1, base2, lines2, opts):
108 def splitblock(base1, lines1, base2, lines2, opts):
107 # The input lines matches except for interwoven blank lines. We
109 # The input lines matches except for interwoven blank lines. We
108 # transform it into a sequence of matching blocks and blank blocks.
110 # transform it into a sequence of matching blocks and blank blocks.
109 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
110 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
111 s1, e1 = 0, len(lines1)
113 s1, e1 = 0, len(lines1)
112 s2, e2 = 0, len(lines2)
114 s2, e2 = 0, len(lines2)
113 while s1 < e1 or s2 < e2:
115 while s1 < e1 or s2 < e2:
114 i1, i2, btype = s1, s2, '='
116 i1, i2, btype = s1, s2, '='
115 if (i1 >= e1 or lines1[i1] == 0
117 if (i1 >= e1 or lines1[i1] == 0
116 or i2 >= e2 or lines2[i2] == 0):
118 or i2 >= e2 or lines2[i2] == 0):
117 # Consume the block of blank lines
119 # Consume the block of blank lines
118 btype = '~'
120 btype = '~'
119 while i1 < e1 and lines1[i1] == 0:
121 while i1 < e1 and lines1[i1] == 0:
120 i1 += 1
122 i1 += 1
121 while i2 < e2 and lines2[i2] == 0:
123 while i2 < e2 and lines2[i2] == 0:
122 i2 += 1
124 i2 += 1
123 else:
125 else:
124 # Consume the matching lines
126 # Consume the matching lines
125 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
126 i1 += 1
128 i1 += 1
127 i2 += 1
129 i2 += 1
128 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
129 s1 = i1
131 s1 = i1
130 s2 = i2
132 s2 = i2
131
133
132 def hunkinrange(hunk, linerange):
134 def hunkinrange(hunk, linerange):
133 """Return True if `hunk` defined as (start, length) is in `linerange`
135 """Return True if `hunk` defined as (start, length) is in `linerange`
134 defined as (lowerbound, upperbound).
136 defined as (lowerbound, upperbound).
135
137
136 >>> hunkinrange((5, 10), (2, 7))
138 >>> hunkinrange((5, 10), (2, 7))
137 True
139 True
138 >>> hunkinrange((5, 10), (6, 12))
140 >>> hunkinrange((5, 10), (6, 12))
139 True
141 True
140 >>> hunkinrange((5, 10), (13, 17))
142 >>> hunkinrange((5, 10), (13, 17))
141 True
143 True
142 >>> hunkinrange((5, 10), (3, 17))
144 >>> hunkinrange((5, 10), (3, 17))
143 True
145 True
144 >>> hunkinrange((5, 10), (1, 3))
146 >>> hunkinrange((5, 10), (1, 3))
145 False
147 False
146 >>> hunkinrange((5, 10), (18, 20))
148 >>> hunkinrange((5, 10), (18, 20))
147 False
149 False
148 >>> hunkinrange((5, 10), (1, 5))
150 >>> hunkinrange((5, 10), (1, 5))
149 False
151 False
150 >>> hunkinrange((5, 10), (15, 27))
152 >>> hunkinrange((5, 10), (15, 27))
151 False
153 False
152 """
154 """
153 start, length = hunk
155 start, length = hunk
154 lowerbound, upperbound = linerange
156 lowerbound, upperbound = linerange
155 return lowerbound < start + length and start < upperbound
157 return lowerbound < start + length and start < upperbound
156
158
157 def blocksinrange(blocks, rangeb):
159 def blocksinrange(blocks, rangeb):
158 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
159 `rangeb` from ``(b1, b2)`` point of view.
161 `rangeb` from ``(b1, b2)`` point of view.
160
162
161 Return `filteredblocks, rangea` where:
163 Return `filteredblocks, rangea` where:
162
164
163 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
164 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
165 block ``(b1, b2)`` being inside `rangeb` if
167 block ``(b1, b2)`` being inside `rangeb` if
166 ``rangeb[0] < b2 and b1 < rangeb[1]``;
168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
167 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
168 """
170 """
169 lbb, ubb = rangeb
171 lbb, ubb = rangeb
170 lba, uba = None, None
172 lba, uba = None, None
171 filteredblocks = []
173 filteredblocks = []
172 for block in blocks:
174 for block in blocks:
173 (a1, a2, b1, b2), stype = block
175 (a1, a2, b1, b2), stype = block
174 if lbb >= b1 and ubb <= b2 and stype == '=':
176 if lbb >= b1 and ubb <= b2 and stype == '=':
175 # rangeb is within a single "=" hunk, restrict back linerange1
177 # rangeb is within a single "=" hunk, restrict back linerange1
176 # by offsetting rangeb
178 # by offsetting rangeb
177 lba = lbb - b1 + a1
179 lba = lbb - b1 + a1
178 uba = ubb - b1 + a1
180 uba = ubb - b1 + a1
179 else:
181 else:
180 if b1 <= lbb < b2:
182 if b1 <= lbb < b2:
181 if stype == '=':
183 if stype == '=':
182 lba = a2 - (b2 - lbb)
184 lba = a2 - (b2 - lbb)
183 else:
185 else:
184 lba = a1
186 lba = a1
185 if b1 < ubb <= b2:
187 if b1 < ubb <= b2:
186 if stype == '=':
188 if stype == '=':
187 uba = a1 + (ubb - b1)
189 uba = a1 + (ubb - b1)
188 else:
190 else:
189 uba = a2
191 uba = a2
190 if hunkinrange((b1, (b2 - b1)), rangeb):
192 if hunkinrange((b1, (b2 - b1)), rangeb):
191 filteredblocks.append(block)
193 filteredblocks.append(block)
192 if lba is None or uba is None or uba < lba:
194 if lba is None or uba is None or uba < lba:
193 raise error.Abort(_('line range exceeds file size'))
195 raise error.Abort(_('line range exceeds file size'))
194 return filteredblocks, (lba, uba)
196 return filteredblocks, (lba, uba)
195
197
196 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
197 """Return (block, type) tuples, where block is an mdiff.blocks
199 """Return (block, type) tuples, where block is an mdiff.blocks
198 line entry. type is '=' for blocks matching exactly one another
200 line entry. type is '=' for blocks matching exactly one another
199 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
200 matching only after having filtered blank lines.
202 matching only after having filtered blank lines.
201 line1 and line2 are text1 and text2 split with splitnewlines() if
203 line1 and line2 are text1 and text2 split with splitnewlines() if
202 they are already available.
204 they are already available.
203 """
205 """
204 if opts is None:
206 if opts is None:
205 opts = defaultopts
207 opts = defaultopts
206 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
207 text1 = wsclean(opts, text1, False)
209 text1 = wsclean(opts, text1, False)
208 text2 = wsclean(opts, text2, False)
210 text2 = wsclean(opts, text2, False)
209 diff = bdiff.blocks(text1, text2)
211 diff = bdiff.blocks(text1, text2)
210 for i, s1 in enumerate(diff):
212 for i, s1 in enumerate(diff):
211 # The first match is special.
213 # The first match is special.
212 # we've either found a match starting at line 0 or a match later
214 # we've either found a match starting at line 0 or a match later
213 # in the file. If it starts later, old and new below will both be
215 # in the file. If it starts later, old and new below will both be
214 # empty and we'll continue to the next match.
216 # empty and we'll continue to the next match.
215 if i > 0:
217 if i > 0:
216 s = diff[i - 1]
218 s = diff[i - 1]
217 else:
219 else:
218 s = [0, 0, 0, 0]
220 s = [0, 0, 0, 0]
219 s = [s[1], s1[0], s[3], s1[2]]
221 s = [s[1], s1[0], s[3], s1[2]]
220
222
221 # bdiff sometimes gives huge matches past eof, this check eats them,
223 # bdiff sometimes gives huge matches past eof, this check eats them,
222 # and deals with the special first match case described above
224 # and deals with the special first match case described above
223 if s[0] != s[1] or s[2] != s[3]:
225 if s[0] != s[1] or s[2] != s[3]:
224 type = '!'
226 type = '!'
225 if opts.ignoreblanklines:
227 if opts.ignoreblanklines:
226 if lines1 is None:
228 if lines1 is None:
227 lines1 = splitnewlines(text1)
229 lines1 = splitnewlines(text1)
228 if lines2 is None:
230 if lines2 is None:
229 lines2 = splitnewlines(text2)
231 lines2 = splitnewlines(text2)
230 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
231 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
232 if old == new:
234 if old == new:
233 type = '~'
235 type = '~'
234 yield s, type
236 yield s, type
235 yield s1, '='
237 yield s1, '='
236
238
237 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts, check_binary=True):
239 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts, check_binary=True):
238 """Return a unified diff as a (headers, hunks) tuple.
240 """Return a unified diff as a (headers, hunks) tuple.
239
241
240 If the diff is not null, `headers` is a list with unified diff header
242 If the diff is not null, `headers` is a list with unified diff header
241 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
242 (hunkrange, hunklines) coming from _unidiff().
244 (hunkrange, hunklines) coming from _unidiff().
243 Otherwise, `headers` and `hunks` are empty.
245 Otherwise, `headers` and `hunks` are empty.
244
246
245 Setting `check_binary` to false will skip the binary check, i.e. when
247 Setting `check_binary` to false will skip the binary check, i.e. when
246 it has been done in advance. Files are expected to be text in this case.
248 it has been done in advance. Files are expected to be text in this case.
247 """
249 """
248 def datetag(date, fn=None):
250 def datetag(date, fn=None):
249 if not opts.git and not opts.nodates:
251 if not opts.git and not opts.nodates:
250 return '\t%s' % date
252 return '\t%s' % date
251 if fn and ' ' in fn:
253 if fn and ' ' in fn:
252 return '\t'
254 return '\t'
253 return ''
255 return ''
254
256
255 sentinel = [], ()
257 sentinel = [], ()
256 if not a and not b:
258 if not a and not b:
257 return sentinel
259 return sentinel
258
260
259 if opts.noprefix:
261 if opts.noprefix:
260 aprefix = bprefix = ''
262 aprefix = bprefix = ''
261 else:
263 else:
262 aprefix = 'a/'
264 aprefix = 'a/'
263 bprefix = 'b/'
265 bprefix = 'b/'
264
266
265 epoch = util.datestr((0, 0))
267 epoch = util.datestr((0, 0))
266
268
267 fn1 = util.pconvert(fn1)
269 fn1 = util.pconvert(fn1)
268 fn2 = util.pconvert(fn2)
270 fn2 = util.pconvert(fn2)
269
271
270 def checknonewline(lines):
271 for text in lines:
272 if text[-1:] != '\n':
273 text += "\n\ No newline at end of file\n"
274 yield text
275
276 if not opts.text and check_binary and (util.binary(a) or util.binary(b)):
272 if not opts.text and check_binary and (util.binary(a) or util.binary(b)):
277 if a and b and len(a) == len(b) and a == b:
273 if a and b and len(a) == len(b) and a == b:
278 return sentinel
274 return sentinel
279 headerlines = []
275 headerlines = []
280 hunks = (None, ['Binary file %s has changed\n' % fn1]),
276 hunks = (None, ['Binary file %s has changed\n' % fn1]),
281 elif not a:
277 elif not a:
278 without_newline = b[-1] != '\n'
282 b = splitnewlines(b)
279 b = splitnewlines(b)
283 if a is None:
280 if a is None:
284 l1 = '--- /dev/null%s' % datetag(epoch)
281 l1 = '--- /dev/null%s' % datetag(epoch)
285 else:
282 else:
286 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
287 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
288 headerlines = [l1, l2]
285 headerlines = [l1, l2]
289 size = len(b)
286 size = len(b)
290 hunkrange = (0, 0, 1, size)
287 hunkrange = (0, 0, 1, size)
291 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
292 hunks = (hunkrange, checknonewline(hunklines)),
289 if without_newline:
290 hunklines[-1] += '\n'
291 hunklines.append(_missing_newline_marker)
292 hunks = (hunkrange, hunklines),
293 elif not b:
293 elif not b:
294 without_newline = a[-1] != '\n'
294 a = splitnewlines(a)
295 a = splitnewlines(a)
295 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
296 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
296 if b is None:
297 if b is None:
297 l2 = '+++ /dev/null%s' % datetag(epoch)
298 l2 = '+++ /dev/null%s' % datetag(epoch)
298 else:
299 else:
299 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
300 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
300 headerlines = [l1, l2]
301 headerlines = [l1, l2]
301 size = len(a)
302 size = len(a)
302 hunkrange = (1, size, 0, 0)
303 hunkrange = (1, size, 0, 0)
303 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
304 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
304 hunks = (hunkrange, checknonewline(hunklines)),
305 if without_newline:
306 hunklines[-1] += '\n'
307 hunklines.append(_missing_newline_marker)
308 hunks = (hunkrange, hunklines),
305 else:
309 else:
306 diffhunks = _unidiff(a, b, opts=opts)
310 diffhunks = _unidiff(a, b, opts=opts)
307 try:
311 try:
308 hunkrange, hunklines = next(diffhunks)
312 hunkrange, hunklines = next(diffhunks)
309 except StopIteration:
313 except StopIteration:
310 return sentinel
314 return sentinel
311
315
312 headerlines = [
316 headerlines = [
313 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
317 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
314 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
318 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
315 ]
319 ]
316 def rewindhunks():
320 def rewindhunks():
317 yield hunkrange, checknonewline(hunklines)
321 yield hunkrange, hunklines
318 for hr, hl in diffhunks:
322 for hr, hl in diffhunks:
319 yield hr, checknonewline(hl)
323 yield hr, hl
320
324
321 hunks = rewindhunks()
325 hunks = rewindhunks()
322
326
323 return headerlines, hunks
327 return headerlines, hunks
324
328
325 def _unidiff(t1, t2, opts=defaultopts):
329 def _unidiff(t1, t2, opts=defaultopts):
326 """Yield hunks of a headerless unified diff from t1 and t2 texts.
330 """Yield hunks of a headerless unified diff from t1 and t2 texts.
327
331
328 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
332 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
329 tuple (s1, l1, s2, l2) representing the range information of the hunk to
333 tuple (s1, l1, s2, l2) representing the range information of the hunk to
330 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
334 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
331 of the hunk combining said header followed by line additions and
335 of the hunk combining said header followed by line additions and
332 deletions.
336 deletions.
337
338 The hunks are prefixed with a bool.
333 """
339 """
334 l1 = splitnewlines(t1)
340 l1 = splitnewlines(t1)
335 l2 = splitnewlines(t2)
341 l2 = splitnewlines(t2)
336 def contextend(l, len):
342 def contextend(l, len):
337 ret = l + opts.context
343 ret = l + opts.context
338 if ret > len:
344 if ret > len:
339 ret = len
345 ret = len
340 return ret
346 return ret
341
347
342 def contextstart(l):
348 def contextstart(l):
343 ret = l - opts.context
349 ret = l - opts.context
344 if ret < 0:
350 if ret < 0:
345 return 0
351 return 0
346 return ret
352 return ret
347
353
348 lastfunc = [0, '']
354 lastfunc = [0, '']
349 def yieldhunk(hunk):
355 def yieldhunk(hunk):
350 (astart, a2, bstart, b2, delta) = hunk
356 (astart, a2, bstart, b2, delta) = hunk
351 aend = contextend(a2, len(l1))
357 aend = contextend(a2, len(l1))
352 alen = aend - astart
358 alen = aend - astart
353 blen = b2 - bstart + aend - a2
359 blen = b2 - bstart + aend - a2
354
360
355 func = ""
361 func = ""
356 if opts.showfunc:
362 if opts.showfunc:
357 lastpos, func = lastfunc
363 lastpos, func = lastfunc
358 # walk backwards from the start of the context up to the start of
364 # walk backwards from the start of the context up to the start of
359 # the previous hunk context until we find a line starting with an
365 # the previous hunk context until we find a line starting with an
360 # alphanumeric char.
366 # alphanumeric char.
361 for i in xrange(astart - 1, lastpos - 1, -1):
367 for i in xrange(astart - 1, lastpos - 1, -1):
362 if l1[i][0:1].isalnum():
368 if l1[i][0:1].isalnum():
363 func = ' ' + l1[i].rstrip()[:40]
369 func = ' ' + l1[i].rstrip()[:40]
364 lastfunc[1] = func
370 lastfunc[1] = func
365 break
371 break
366 # by recording this hunk's starting point as the next place to
372 # by recording this hunk's starting point as the next place to
367 # start looking for function lines, we avoid reading any line in
373 # start looking for function lines, we avoid reading any line in
368 # the file more than once.
374 # the file more than once.
369 lastfunc[0] = astart
375 lastfunc[0] = astart
370
376
371 # zero-length hunk ranges report their start line as one less
377 # zero-length hunk ranges report their start line as one less
372 if alen:
378 if alen:
373 astart += 1
379 astart += 1
374 if blen:
380 if blen:
375 bstart += 1
381 bstart += 1
376
382
377 hunkrange = astart, alen, bstart, blen
383 hunkrange = astart, alen, bstart, blen
378 hunklines = (
384 hunklines = (
379 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
385 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
380 + delta
386 + delta
381 + [' ' + l1[x] for x in xrange(a2, aend)]
387 + [' ' + l1[x] for x in xrange(a2, aend)]
382 )
388 )
389 # If either file ends without a newline and the last line of
390 # that file is part of a hunk, a marker is printed. If the
391 # last line of both files is identical and neither ends in
392 # a newline, print only one marker. That's the only case in
393 # which the hunk can end in a shared line without a newline.
394 skip = False
395 if t1[-1] != '\n' and astart + alen == len(l1) + 1:
396 for i in xrange(len(hunklines) - 1, -1, -1):
397 if hunklines[i][0] in ('-', ' '):
398 if hunklines[i][0] == ' ':
399 skip = True
400 hunklines[i] += '\n'
401 hunklines.insert(i + 1, _missing_newline_marker)
402 break
403 if not skip and t2[-1] != '\n' and bstart + blen == len(l2) + 1:
404 for i in xrange(len(hunklines) - 1, -1, -1):
405 if hunklines[i][0] == '+':
406 hunklines[i] += '\n'
407 hunklines.insert(i + 1, _missing_newline_marker)
408 break
383 yield hunkrange, hunklines
409 yield hunkrange, hunklines
384
410
385 # bdiff.blocks gives us the matching sequences in the files. The loop
411 # bdiff.blocks gives us the matching sequences in the files. The loop
386 # below finds the spaces between those matching sequences and translates
412 # below finds the spaces between those matching sequences and translates
387 # them into diff output.
413 # them into diff output.
388 #
414 #
389 hunk = None
415 hunk = None
390 ignoredlines = 0
416 ignoredlines = 0
391 for s, stype in allblocks(t1, t2, opts, l1, l2):
417 for s, stype in allblocks(t1, t2, opts, l1, l2):
392 a1, a2, b1, b2 = s
418 a1, a2, b1, b2 = s
393 if stype != '!':
419 if stype != '!':
394 if stype == '~':
420 if stype == '~':
395 # The diff context lines are based on t1 content. When
421 # The diff context lines are based on t1 content. When
396 # blank lines are ignored, the new lines offsets must
422 # blank lines are ignored, the new lines offsets must
397 # be adjusted as if equivalent blocks ('~') had the
423 # be adjusted as if equivalent blocks ('~') had the
398 # same sizes on both sides.
424 # same sizes on both sides.
399 ignoredlines += (b2 - b1) - (a2 - a1)
425 ignoredlines += (b2 - b1) - (a2 - a1)
400 continue
426 continue
401 delta = []
427 delta = []
402 old = l1[a1:a2]
428 old = l1[a1:a2]
403 new = l2[b1:b2]
429 new = l2[b1:b2]
404
430
405 b1 -= ignoredlines
431 b1 -= ignoredlines
406 b2 -= ignoredlines
432 b2 -= ignoredlines
407 astart = contextstart(a1)
433 astart = contextstart(a1)
408 bstart = contextstart(b1)
434 bstart = contextstart(b1)
409 prev = None
435 prev = None
410 if hunk:
436 if hunk:
411 # join with the previous hunk if it falls inside the context
437 # join with the previous hunk if it falls inside the context
412 if astart < hunk[1] + opts.context + 1:
438 if astart < hunk[1] + opts.context + 1:
413 prev = hunk
439 prev = hunk
414 astart = hunk[1]
440 astart = hunk[1]
415 bstart = hunk[3]
441 bstart = hunk[3]
416 else:
442 else:
417 for x in yieldhunk(hunk):
443 for x in yieldhunk(hunk):
418 yield x
444 yield x
419 if prev:
445 if prev:
420 # we've joined the previous hunk, record the new ending points.
446 # we've joined the previous hunk, record the new ending points.
421 hunk[1] = a2
447 hunk[1] = a2
422 hunk[3] = b2
448 hunk[3] = b2
423 delta = hunk[4]
449 delta = hunk[4]
424 else:
450 else:
425 # create a new hunk
451 # create a new hunk
426 hunk = [astart, a2, bstart, b2, delta]
452 hunk = [astart, a2, bstart, b2, delta]
427
453
428 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
454 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
429 delta[len(delta):] = ['-' + x for x in old]
455 delta[len(delta):] = ['-' + x for x in old]
430 delta[len(delta):] = ['+' + x for x in new]
456 delta[len(delta):] = ['+' + x for x in new]
431
457
432 if hunk:
458 if hunk:
433 for x in yieldhunk(hunk):
459 for x in yieldhunk(hunk):
434 yield x
460 yield x
435
461
436 def b85diff(to, tn):
462 def b85diff(to, tn):
437 '''print base85-encoded binary diff'''
463 '''print base85-encoded binary diff'''
438 def fmtline(line):
464 def fmtline(line):
439 l = len(line)
465 l = len(line)
440 if l <= 26:
466 if l <= 26:
441 l = chr(ord('A') + l - 1)
467 l = chr(ord('A') + l - 1)
442 else:
468 else:
443 l = chr(l - 26 + ord('a') - 1)
469 l = chr(l - 26 + ord('a') - 1)
444 return '%c%s\n' % (l, util.b85encode(line, True))
470 return '%c%s\n' % (l, util.b85encode(line, True))
445
471
446 def chunk(text, csize=52):
472 def chunk(text, csize=52):
447 l = len(text)
473 l = len(text)
448 i = 0
474 i = 0
449 while i < l:
475 while i < l:
450 yield text[i:i + csize]
476 yield text[i:i + csize]
451 i += csize
477 i += csize
452
478
453 if to is None:
479 if to is None:
454 to = ''
480 to = ''
455 if tn is None:
481 if tn is None:
456 tn = ''
482 tn = ''
457
483
458 if to == tn:
484 if to == tn:
459 return ''
485 return ''
460
486
461 # TODO: deltas
487 # TODO: deltas
462 ret = []
488 ret = []
463 ret.append('GIT binary patch\n')
489 ret.append('GIT binary patch\n')
464 ret.append('literal %d\n' % len(tn))
490 ret.append('literal %d\n' % len(tn))
465 for l in chunk(zlib.compress(tn)):
491 for l in chunk(zlib.compress(tn)):
466 ret.append(fmtline(l))
492 ret.append(fmtline(l))
467 ret.append('\n')
493 ret.append('\n')
468
494
469 return ''.join(ret)
495 return ''.join(ret)
470
496
471 def patchtext(bin):
497 def patchtext(bin):
472 pos = 0
498 pos = 0
473 t = []
499 t = []
474 while pos < len(bin):
500 while pos < len(bin):
475 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
501 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
476 pos += 12
502 pos += 12
477 t.append(bin[pos:pos + l])
503 t.append(bin[pos:pos + l])
478 pos += l
504 pos += l
479 return "".join(t)
505 return "".join(t)
480
506
481 def patch(a, bin):
507 def patch(a, bin):
482 if len(a) == 0:
508 if len(a) == 0:
483 # skip over trivial delta header
509 # skip over trivial delta header
484 return util.buffer(bin, 12)
510 return util.buffer(bin, 12)
485 return mpatch.patches(a, [bin])
511 return mpatch.patches(a, [bin])
486
512
487 # similar to difflib.SequenceMatcher.get_matching_blocks
513 # similar to difflib.SequenceMatcher.get_matching_blocks
488 def get_matching_blocks(a, b):
514 def get_matching_blocks(a, b):
489 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
515 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
490
516
491 def trivialdiffheader(length):
517 def trivialdiffheader(length):
492 return struct.pack(">lll", 0, 0, length) if length else ''
518 return struct.pack(">lll", 0, 0, length) if length else ''
493
519
494 def replacediffheader(oldlen, newlen):
520 def replacediffheader(oldlen, newlen):
495 return struct.pack(">lll", 0, oldlen, newlen)
521 return struct.pack(">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now