##// END OF EJS Templates
mdiff: remove rewindhunk by yielding a bool first to indicate data...
Joerg Sonnenberger -
r35870:6a33e81e default
parent child Browse files
Show More
@@ -1,521 +1,522 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import re
10 import re
11 import struct
11 import struct
12 import zlib
12 import zlib
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 policy,
17 policy,
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 _missing_newline_marker = "\\ No newline at end of file\n"
22 _missing_newline_marker = "\\ No newline at end of file\n"
23
23
24 bdiff = policy.importmod(r'bdiff')
24 bdiff = policy.importmod(r'bdiff')
25 mpatch = policy.importmod(r'mpatch')
25 mpatch = policy.importmod(r'mpatch')
26
26
27 blocks = bdiff.blocks
27 blocks = bdiff.blocks
28 fixws = bdiff.fixws
28 fixws = bdiff.fixws
29 patches = mpatch.patches
29 patches = mpatch.patches
30 patchedsize = mpatch.patchedsize
30 patchedsize = mpatch.patchedsize
31 textdiff = bdiff.bdiff
31 textdiff = bdiff.bdiff
32
32
33 def splitnewlines(text):
33 def splitnewlines(text):
34 '''like str.splitlines, but only split on newlines.'''
34 '''like str.splitlines, but only split on newlines.'''
35 lines = [l + '\n' for l in text.split('\n')]
35 lines = [l + '\n' for l in text.split('\n')]
36 if lines:
36 if lines:
37 if lines[-1] == '\n':
37 if lines[-1] == '\n':
38 lines.pop()
38 lines.pop()
39 else:
39 else:
40 lines[-1] = lines[-1][:-1]
40 lines[-1] = lines[-1][:-1]
41 return lines
41 return lines
42
42
43 class diffopts(object):
43 class diffopts(object):
44 '''context is the number of context lines
44 '''context is the number of context lines
45 text treats all files as text
45 text treats all files as text
46 showfunc enables diff -p output
46 showfunc enables diff -p output
47 git enables the git extended patch format
47 git enables the git extended patch format
48 nodates removes dates from diff headers
48 nodates removes dates from diff headers
49 nobinary ignores binary files
49 nobinary ignores binary files
50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
51 ignorews ignores all whitespace changes in the diff
51 ignorews ignores all whitespace changes in the diff
52 ignorewsamount ignores changes in the amount of whitespace
52 ignorewsamount ignores changes in the amount of whitespace
53 ignoreblanklines ignores changes whose lines are all blank
53 ignoreblanklines ignores changes whose lines are all blank
54 upgrade generates git diffs to avoid data loss
54 upgrade generates git diffs to avoid data loss
55 '''
55 '''
56
56
57 defaults = {
57 defaults = {
58 'context': 3,
58 'context': 3,
59 'text': False,
59 'text': False,
60 'showfunc': False,
60 'showfunc': False,
61 'git': False,
61 'git': False,
62 'nodates': False,
62 'nodates': False,
63 'nobinary': False,
63 'nobinary': False,
64 'noprefix': False,
64 'noprefix': False,
65 'index': 0,
65 'index': 0,
66 'ignorews': False,
66 'ignorews': False,
67 'ignorewsamount': False,
67 'ignorewsamount': False,
68 'ignorewseol': False,
68 'ignorewseol': False,
69 'ignoreblanklines': False,
69 'ignoreblanklines': False,
70 'upgrade': False,
70 'upgrade': False,
71 'showsimilarity': False,
71 'showsimilarity': False,
72 'worddiff': False,
72 'worddiff': False,
73 }
73 }
74
74
75 def __init__(self, **opts):
75 def __init__(self, **opts):
76 opts = pycompat.byteskwargs(opts)
76 opts = pycompat.byteskwargs(opts)
77 for k in self.defaults.keys():
77 for k in self.defaults.keys():
78 v = opts.get(k)
78 v = opts.get(k)
79 if v is None:
79 if v is None:
80 v = self.defaults[k]
80 v = self.defaults[k]
81 setattr(self, k, v)
81 setattr(self, k, v)
82
82
83 try:
83 try:
84 self.context = int(self.context)
84 self.context = int(self.context)
85 except ValueError:
85 except ValueError:
86 raise error.Abort(_('diff context lines count must be '
86 raise error.Abort(_('diff context lines count must be '
87 'an integer, not %r') % self.context)
87 'an integer, not %r') % self.context)
88
88
89 def copy(self, **kwargs):
89 def copy(self, **kwargs):
90 opts = dict((k, getattr(self, k)) for k in self.defaults)
90 opts = dict((k, getattr(self, k)) for k in self.defaults)
91 opts = pycompat.strkwargs(opts)
91 opts = pycompat.strkwargs(opts)
92 opts.update(kwargs)
92 opts.update(kwargs)
93 return diffopts(**opts)
93 return diffopts(**opts)
94
94
95 defaultopts = diffopts()
95 defaultopts = diffopts()
96
96
97 def wsclean(opts, text, blank=True):
97 def wsclean(opts, text, blank=True):
98 if opts.ignorews:
98 if opts.ignorews:
99 text = bdiff.fixws(text, 1)
99 text = bdiff.fixws(text, 1)
100 elif opts.ignorewsamount:
100 elif opts.ignorewsamount:
101 text = bdiff.fixws(text, 0)
101 text = bdiff.fixws(text, 0)
102 if blank and opts.ignoreblanklines:
102 if blank and opts.ignoreblanklines:
103 text = re.sub('\n+', '\n', text).strip('\n')
103 text = re.sub('\n+', '\n', text).strip('\n')
104 if opts.ignorewseol:
104 if opts.ignorewseol:
105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
106 return text
106 return text
107
107
108 def splitblock(base1, lines1, base2, lines2, opts):
108 def splitblock(base1, lines1, base2, lines2, opts):
109 # The input lines matches except for interwoven blank lines. We
109 # The input lines matches except for interwoven blank lines. We
110 # transform it into a sequence of matching blocks and blank blocks.
110 # transform it into a sequence of matching blocks and blank blocks.
111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
113 s1, e1 = 0, len(lines1)
113 s1, e1 = 0, len(lines1)
114 s2, e2 = 0, len(lines2)
114 s2, e2 = 0, len(lines2)
115 while s1 < e1 or s2 < e2:
115 while s1 < e1 or s2 < e2:
116 i1, i2, btype = s1, s2, '='
116 i1, i2, btype = s1, s2, '='
117 if (i1 >= e1 or lines1[i1] == 0
117 if (i1 >= e1 or lines1[i1] == 0
118 or i2 >= e2 or lines2[i2] == 0):
118 or i2 >= e2 or lines2[i2] == 0):
119 # Consume the block of blank lines
119 # Consume the block of blank lines
120 btype = '~'
120 btype = '~'
121 while i1 < e1 and lines1[i1] == 0:
121 while i1 < e1 and lines1[i1] == 0:
122 i1 += 1
122 i1 += 1
123 while i2 < e2 and lines2[i2] == 0:
123 while i2 < e2 and lines2[i2] == 0:
124 i2 += 1
124 i2 += 1
125 else:
125 else:
126 # Consume the matching lines
126 # Consume the matching lines
127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
128 i1 += 1
128 i1 += 1
129 i2 += 1
129 i2 += 1
130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
131 s1 = i1
131 s1 = i1
132 s2 = i2
132 s2 = i2
133
133
134 def hunkinrange(hunk, linerange):
134 def hunkinrange(hunk, linerange):
135 """Return True if `hunk` defined as (start, length) is in `linerange`
135 """Return True if `hunk` defined as (start, length) is in `linerange`
136 defined as (lowerbound, upperbound).
136 defined as (lowerbound, upperbound).
137
137
138 >>> hunkinrange((5, 10), (2, 7))
138 >>> hunkinrange((5, 10), (2, 7))
139 True
139 True
140 >>> hunkinrange((5, 10), (6, 12))
140 >>> hunkinrange((5, 10), (6, 12))
141 True
141 True
142 >>> hunkinrange((5, 10), (13, 17))
142 >>> hunkinrange((5, 10), (13, 17))
143 True
143 True
144 >>> hunkinrange((5, 10), (3, 17))
144 >>> hunkinrange((5, 10), (3, 17))
145 True
145 True
146 >>> hunkinrange((5, 10), (1, 3))
146 >>> hunkinrange((5, 10), (1, 3))
147 False
147 False
148 >>> hunkinrange((5, 10), (18, 20))
148 >>> hunkinrange((5, 10), (18, 20))
149 False
149 False
150 >>> hunkinrange((5, 10), (1, 5))
150 >>> hunkinrange((5, 10), (1, 5))
151 False
151 False
152 >>> hunkinrange((5, 10), (15, 27))
152 >>> hunkinrange((5, 10), (15, 27))
153 False
153 False
154 """
154 """
155 start, length = hunk
155 start, length = hunk
156 lowerbound, upperbound = linerange
156 lowerbound, upperbound = linerange
157 return lowerbound < start + length and start < upperbound
157 return lowerbound < start + length and start < upperbound
158
158
159 def blocksinrange(blocks, rangeb):
159 def blocksinrange(blocks, rangeb):
160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
161 `rangeb` from ``(b1, b2)`` point of view.
161 `rangeb` from ``(b1, b2)`` point of view.
162
162
163 Return `filteredblocks, rangea` where:
163 Return `filteredblocks, rangea` where:
164
164
165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
167 block ``(b1, b2)`` being inside `rangeb` if
167 block ``(b1, b2)`` being inside `rangeb` if
168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
170 """
170 """
171 lbb, ubb = rangeb
171 lbb, ubb = rangeb
172 lba, uba = None, None
172 lba, uba = None, None
173 filteredblocks = []
173 filteredblocks = []
174 for block in blocks:
174 for block in blocks:
175 (a1, a2, b1, b2), stype = block
175 (a1, a2, b1, b2), stype = block
176 if lbb >= b1 and ubb <= b2 and stype == '=':
176 if lbb >= b1 and ubb <= b2 and stype == '=':
177 # rangeb is within a single "=" hunk, restrict back linerange1
177 # rangeb is within a single "=" hunk, restrict back linerange1
178 # by offsetting rangeb
178 # by offsetting rangeb
179 lba = lbb - b1 + a1
179 lba = lbb - b1 + a1
180 uba = ubb - b1 + a1
180 uba = ubb - b1 + a1
181 else:
181 else:
182 if b1 <= lbb < b2:
182 if b1 <= lbb < b2:
183 if stype == '=':
183 if stype == '=':
184 lba = a2 - (b2 - lbb)
184 lba = a2 - (b2 - lbb)
185 else:
185 else:
186 lba = a1
186 lba = a1
187 if b1 < ubb <= b2:
187 if b1 < ubb <= b2:
188 if stype == '=':
188 if stype == '=':
189 uba = a1 + (ubb - b1)
189 uba = a1 + (ubb - b1)
190 else:
190 else:
191 uba = a2
191 uba = a2
192 if hunkinrange((b1, (b2 - b1)), rangeb):
192 if hunkinrange((b1, (b2 - b1)), rangeb):
193 filteredblocks.append(block)
193 filteredblocks.append(block)
194 if lba is None or uba is None or uba < lba:
194 if lba is None or uba is None or uba < lba:
195 raise error.Abort(_('line range exceeds file size'))
195 raise error.Abort(_('line range exceeds file size'))
196 return filteredblocks, (lba, uba)
196 return filteredblocks, (lba, uba)
197
197
198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
199 """Return (block, type) tuples, where block is an mdiff.blocks
199 """Return (block, type) tuples, where block is an mdiff.blocks
200 line entry. type is '=' for blocks matching exactly one another
200 line entry. type is '=' for blocks matching exactly one another
201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
202 matching only after having filtered blank lines.
202 matching only after having filtered blank lines.
203 line1 and line2 are text1 and text2 split with splitnewlines() if
203 line1 and line2 are text1 and text2 split with splitnewlines() if
204 they are already available.
204 they are already available.
205 """
205 """
206 if opts is None:
206 if opts is None:
207 opts = defaultopts
207 opts = defaultopts
208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
209 text1 = wsclean(opts, text1, False)
209 text1 = wsclean(opts, text1, False)
210 text2 = wsclean(opts, text2, False)
210 text2 = wsclean(opts, text2, False)
211 diff = bdiff.blocks(text1, text2)
211 diff = bdiff.blocks(text1, text2)
212 for i, s1 in enumerate(diff):
212 for i, s1 in enumerate(diff):
213 # The first match is special.
213 # The first match is special.
214 # we've either found a match starting at line 0 or a match later
214 # we've either found a match starting at line 0 or a match later
215 # in the file. If it starts later, old and new below will both be
215 # in the file. If it starts later, old and new below will both be
216 # empty and we'll continue to the next match.
216 # empty and we'll continue to the next match.
217 if i > 0:
217 if i > 0:
218 s = diff[i - 1]
218 s = diff[i - 1]
219 else:
219 else:
220 s = [0, 0, 0, 0]
220 s = [0, 0, 0, 0]
221 s = [s[1], s1[0], s[3], s1[2]]
221 s = [s[1], s1[0], s[3], s1[2]]
222
222
223 # bdiff sometimes gives huge matches past eof, this check eats them,
223 # bdiff sometimes gives huge matches past eof, this check eats them,
224 # and deals with the special first match case described above
224 # and deals with the special first match case described above
225 if s[0] != s[1] or s[2] != s[3]:
225 if s[0] != s[1] or s[2] != s[3]:
226 type = '!'
226 type = '!'
227 if opts.ignoreblanklines:
227 if opts.ignoreblanklines:
228 if lines1 is None:
228 if lines1 is None:
229 lines1 = splitnewlines(text1)
229 lines1 = splitnewlines(text1)
230 if lines2 is None:
230 if lines2 is None:
231 lines2 = splitnewlines(text2)
231 lines2 = splitnewlines(text2)
232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
234 if old == new:
234 if old == new:
235 type = '~'
235 type = '~'
236 yield s, type
236 yield s, type
237 yield s1, '='
237 yield s1, '='
238
238
239 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts, check_binary=True):
239 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts, check_binary=True):
240 """Return a unified diff as a (headers, hunks) tuple.
240 """Return a unified diff as a (headers, hunks) tuple.
241
241
242 If the diff is not null, `headers` is a list with unified diff header
242 If the diff is not null, `headers` is a list with unified diff header
243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
244 (hunkrange, hunklines) coming from _unidiff().
244 (hunkrange, hunklines) coming from _unidiff().
245 Otherwise, `headers` and `hunks` are empty.
245 Otherwise, `headers` and `hunks` are empty.
246
246
247 Setting `check_binary` to false will skip the binary check, i.e. when
247 Setting `check_binary` to false will skip the binary check, i.e. when
248 it has been done in advance. Files are expected to be text in this case.
248 it has been done in advance. Files are expected to be text in this case.
249 """
249 """
250 def datetag(date, fn=None):
250 def datetag(date, fn=None):
251 if not opts.git and not opts.nodates:
251 if not opts.git and not opts.nodates:
252 return '\t%s' % date
252 return '\t%s' % date
253 if fn and ' ' in fn:
253 if fn and ' ' in fn:
254 return '\t'
254 return '\t'
255 return ''
255 return ''
256
256
257 sentinel = [], ()
257 sentinel = [], ()
258 if not a and not b:
258 if not a and not b:
259 return sentinel
259 return sentinel
260
260
261 if opts.noprefix:
261 if opts.noprefix:
262 aprefix = bprefix = ''
262 aprefix = bprefix = ''
263 else:
263 else:
264 aprefix = 'a/'
264 aprefix = 'a/'
265 bprefix = 'b/'
265 bprefix = 'b/'
266
266
267 epoch = util.datestr((0, 0))
267 epoch = util.datestr((0, 0))
268
268
269 fn1 = util.pconvert(fn1)
269 fn1 = util.pconvert(fn1)
270 fn2 = util.pconvert(fn2)
270 fn2 = util.pconvert(fn2)
271
271
272 if not opts.text and check_binary and (util.binary(a) or util.binary(b)):
272 if not opts.text and check_binary and (util.binary(a) or util.binary(b)):
273 if a and b and len(a) == len(b) and a == b:
273 if a and b and len(a) == len(b) and a == b:
274 return sentinel
274 return sentinel
275 headerlines = []
275 headerlines = []
276 hunks = (None, ['Binary file %s has changed\n' % fn1]),
276 hunks = (None, ['Binary file %s has changed\n' % fn1]),
277 elif not a:
277 elif not a:
278 without_newline = b[-1] != '\n'
278 without_newline = b[-1] != '\n'
279 b = splitnewlines(b)
279 b = splitnewlines(b)
280 if a is None:
280 if a is None:
281 l1 = '--- /dev/null%s' % datetag(epoch)
281 l1 = '--- /dev/null%s' % datetag(epoch)
282 else:
282 else:
283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
285 headerlines = [l1, l2]
285 headerlines = [l1, l2]
286 size = len(b)
286 size = len(b)
287 hunkrange = (0, 0, 1, size)
287 hunkrange = (0, 0, 1, size)
288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
289 if without_newline:
289 if without_newline:
290 hunklines[-1] += '\n'
290 hunklines[-1] += '\n'
291 hunklines.append(_missing_newline_marker)
291 hunklines.append(_missing_newline_marker)
292 hunks = (hunkrange, hunklines),
292 hunks = (hunkrange, hunklines),
293 elif not b:
293 elif not b:
294 without_newline = a[-1] != '\n'
294 without_newline = a[-1] != '\n'
295 a = splitnewlines(a)
295 a = splitnewlines(a)
296 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
296 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
297 if b is None:
297 if b is None:
298 l2 = '+++ /dev/null%s' % datetag(epoch)
298 l2 = '+++ /dev/null%s' % datetag(epoch)
299 else:
299 else:
300 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
300 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
301 headerlines = [l1, l2]
301 headerlines = [l1, l2]
302 size = len(a)
302 size = len(a)
303 hunkrange = (1, size, 0, 0)
303 hunkrange = (1, size, 0, 0)
304 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
304 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
305 if without_newline:
305 if without_newline:
306 hunklines[-1] += '\n'
306 hunklines[-1] += '\n'
307 hunklines.append(_missing_newline_marker)
307 hunklines.append(_missing_newline_marker)
308 hunks = (hunkrange, hunklines),
308 hunks = (hunkrange, hunklines),
309 else:
309 else:
310 diffhunks = _unidiff(a, b, opts=opts)
310 hunks = _unidiff(a, b, opts=opts)
311 try:
311 if not next(hunks):
312 hunkrange, hunklines = next(diffhunks)
313 except StopIteration:
314 return sentinel
312 return sentinel
315
313
316 headerlines = [
314 headerlines = [
317 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
315 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
318 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
316 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
319 ]
317 ]
320 def rewindhunks():
321 yield hunkrange, hunklines
322 for hr, hl in diffhunks:
323 yield hr, hl
324
325 hunks = rewindhunks()
326
318
327 return headerlines, hunks
319 return headerlines, hunks
328
320
329 def _unidiff(t1, t2, opts=defaultopts):
321 def _unidiff(t1, t2, opts=defaultopts):
330 """Yield hunks of a headerless unified diff from t1 and t2 texts.
322 """Yield hunks of a headerless unified diff from t1 and t2 texts.
331
323
332 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
324 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
333 tuple (s1, l1, s2, l2) representing the range information of the hunk to
325 tuple (s1, l1, s2, l2) representing the range information of the hunk to
334 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
326 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
335 of the hunk combining said header followed by line additions and
327 of the hunk combining said header followed by line additions and
336 deletions.
328 deletions.
337
329
338 The hunks are prefixed with a bool.
330 The hunks are prefixed with a bool.
339 """
331 """
340 l1 = splitnewlines(t1)
332 l1 = splitnewlines(t1)
341 l2 = splitnewlines(t2)
333 l2 = splitnewlines(t2)
342 def contextend(l, len):
334 def contextend(l, len):
343 ret = l + opts.context
335 ret = l + opts.context
344 if ret > len:
336 if ret > len:
345 ret = len
337 ret = len
346 return ret
338 return ret
347
339
348 def contextstart(l):
340 def contextstart(l):
349 ret = l - opts.context
341 ret = l - opts.context
350 if ret < 0:
342 if ret < 0:
351 return 0
343 return 0
352 return ret
344 return ret
353
345
354 lastfunc = [0, '']
346 lastfunc = [0, '']
355 def yieldhunk(hunk):
347 def yieldhunk(hunk):
356 (astart, a2, bstart, b2, delta) = hunk
348 (astart, a2, bstart, b2, delta) = hunk
357 aend = contextend(a2, len(l1))
349 aend = contextend(a2, len(l1))
358 alen = aend - astart
350 alen = aend - astart
359 blen = b2 - bstart + aend - a2
351 blen = b2 - bstart + aend - a2
360
352
361 func = ""
353 func = ""
362 if opts.showfunc:
354 if opts.showfunc:
363 lastpos, func = lastfunc
355 lastpos, func = lastfunc
364 # walk backwards from the start of the context up to the start of
356 # walk backwards from the start of the context up to the start of
365 # the previous hunk context until we find a line starting with an
357 # the previous hunk context until we find a line starting with an
366 # alphanumeric char.
358 # alphanumeric char.
367 for i in xrange(astart - 1, lastpos - 1, -1):
359 for i in xrange(astart - 1, lastpos - 1, -1):
368 if l1[i][0:1].isalnum():
360 if l1[i][0:1].isalnum():
369 func = ' ' + l1[i].rstrip()[:40]
361 func = ' ' + l1[i].rstrip()[:40]
370 lastfunc[1] = func
362 lastfunc[1] = func
371 break
363 break
372 # by recording this hunk's starting point as the next place to
364 # by recording this hunk's starting point as the next place to
373 # start looking for function lines, we avoid reading any line in
365 # start looking for function lines, we avoid reading any line in
374 # the file more than once.
366 # the file more than once.
375 lastfunc[0] = astart
367 lastfunc[0] = astart
376
368
377 # zero-length hunk ranges report their start line as one less
369 # zero-length hunk ranges report their start line as one less
378 if alen:
370 if alen:
379 astart += 1
371 astart += 1
380 if blen:
372 if blen:
381 bstart += 1
373 bstart += 1
382
374
383 hunkrange = astart, alen, bstart, blen
375 hunkrange = astart, alen, bstart, blen
384 hunklines = (
376 hunklines = (
385 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
377 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
386 + delta
378 + delta
387 + [' ' + l1[x] for x in xrange(a2, aend)]
379 + [' ' + l1[x] for x in xrange(a2, aend)]
388 )
380 )
389 # If either file ends without a newline and the last line of
381 # If either file ends without a newline and the last line of
390 # that file is part of a hunk, a marker is printed. If the
382 # that file is part of a hunk, a marker is printed. If the
391 # last line of both files is identical and neither ends in
383 # last line of both files is identical and neither ends in
392 # a newline, print only one marker. That's the only case in
384 # a newline, print only one marker. That's the only case in
393 # which the hunk can end in a shared line without a newline.
385 # which the hunk can end in a shared line without a newline.
394 skip = False
386 skip = False
395 if t1[-1] != '\n' and astart + alen == len(l1) + 1:
387 if t1[-1] != '\n' and astart + alen == len(l1) + 1:
396 for i in xrange(len(hunklines) - 1, -1, -1):
388 for i in xrange(len(hunklines) - 1, -1, -1):
397 if hunklines[i][0] in ('-', ' '):
389 if hunklines[i][0] in ('-', ' '):
398 if hunklines[i][0] == ' ':
390 if hunklines[i][0] == ' ':
399 skip = True
391 skip = True
400 hunklines[i] += '\n'
392 hunklines[i] += '\n'
401 hunklines.insert(i + 1, _missing_newline_marker)
393 hunklines.insert(i + 1, _missing_newline_marker)
402 break
394 break
403 if not skip and t2[-1] != '\n' and bstart + blen == len(l2) + 1:
395 if not skip and t2[-1] != '\n' and bstart + blen == len(l2) + 1:
404 for i in xrange(len(hunklines) - 1, -1, -1):
396 for i in xrange(len(hunklines) - 1, -1, -1):
405 if hunklines[i][0] == '+':
397 if hunklines[i][0] == '+':
406 hunklines[i] += '\n'
398 hunklines[i] += '\n'
407 hunklines.insert(i + 1, _missing_newline_marker)
399 hunklines.insert(i + 1, _missing_newline_marker)
408 break
400 break
409 yield hunkrange, hunklines
401 yield hunkrange, hunklines
410
402
411 # bdiff.blocks gives us the matching sequences in the files. The loop
403 # bdiff.blocks gives us the matching sequences in the files. The loop
412 # below finds the spaces between those matching sequences and translates
404 # below finds the spaces between those matching sequences and translates
413 # them into diff output.
405 # them into diff output.
414 #
406 #
415 hunk = None
407 hunk = None
416 ignoredlines = 0
408 ignoredlines = 0
409 has_hunks = False
417 for s, stype in allblocks(t1, t2, opts, l1, l2):
410 for s, stype in allblocks(t1, t2, opts, l1, l2):
418 a1, a2, b1, b2 = s
411 a1, a2, b1, b2 = s
419 if stype != '!':
412 if stype != '!':
420 if stype == '~':
413 if stype == '~':
421 # The diff context lines are based on t1 content. When
414 # The diff context lines are based on t1 content. When
422 # blank lines are ignored, the new lines offsets must
415 # blank lines are ignored, the new lines offsets must
423 # be adjusted as if equivalent blocks ('~') had the
416 # be adjusted as if equivalent blocks ('~') had the
424 # same sizes on both sides.
417 # same sizes on both sides.
425 ignoredlines += (b2 - b1) - (a2 - a1)
418 ignoredlines += (b2 - b1) - (a2 - a1)
426 continue
419 continue
427 delta = []
420 delta = []
428 old = l1[a1:a2]
421 old = l1[a1:a2]
429 new = l2[b1:b2]
422 new = l2[b1:b2]
430
423
431 b1 -= ignoredlines
424 b1 -= ignoredlines
432 b2 -= ignoredlines
425 b2 -= ignoredlines
433 astart = contextstart(a1)
426 astart = contextstart(a1)
434 bstart = contextstart(b1)
427 bstart = contextstart(b1)
435 prev = None
428 prev = None
436 if hunk:
429 if hunk:
437 # join with the previous hunk if it falls inside the context
430 # join with the previous hunk if it falls inside the context
438 if astart < hunk[1] + opts.context + 1:
431 if astart < hunk[1] + opts.context + 1:
439 prev = hunk
432 prev = hunk
440 astart = hunk[1]
433 astart = hunk[1]
441 bstart = hunk[3]
434 bstart = hunk[3]
442 else:
435 else:
436 if not has_hunks:
437 has_hunks = True
438 yield True
443 for x in yieldhunk(hunk):
439 for x in yieldhunk(hunk):
444 yield x
440 yield x
445 if prev:
441 if prev:
446 # we've joined the previous hunk, record the new ending points.
442 # we've joined the previous hunk, record the new ending points.
447 hunk[1] = a2
443 hunk[1] = a2
448 hunk[3] = b2
444 hunk[3] = b2
449 delta = hunk[4]
445 delta = hunk[4]
450 else:
446 else:
451 # create a new hunk
447 # create a new hunk
452 hunk = [astart, a2, bstart, b2, delta]
448 hunk = [astart, a2, bstart, b2, delta]
453
449
454 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
450 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
455 delta[len(delta):] = ['-' + x for x in old]
451 delta[len(delta):] = ['-' + x for x in old]
456 delta[len(delta):] = ['+' + x for x in new]
452 delta[len(delta):] = ['+' + x for x in new]
457
453
458 if hunk:
454 if hunk:
455 if not has_hunks:
456 has_hunks = True
457 yield True
459 for x in yieldhunk(hunk):
458 for x in yieldhunk(hunk):
460 yield x
459 yield x
460 elif not has_hunks:
461 yield False
461
462
462 def b85diff(to, tn):
463 def b85diff(to, tn):
463 '''print base85-encoded binary diff'''
464 '''print base85-encoded binary diff'''
464 def fmtline(line):
465 def fmtline(line):
465 l = len(line)
466 l = len(line)
466 if l <= 26:
467 if l <= 26:
467 l = chr(ord('A') + l - 1)
468 l = chr(ord('A') + l - 1)
468 else:
469 else:
469 l = chr(l - 26 + ord('a') - 1)
470 l = chr(l - 26 + ord('a') - 1)
470 return '%c%s\n' % (l, util.b85encode(line, True))
471 return '%c%s\n' % (l, util.b85encode(line, True))
471
472
472 def chunk(text, csize=52):
473 def chunk(text, csize=52):
473 l = len(text)
474 l = len(text)
474 i = 0
475 i = 0
475 while i < l:
476 while i < l:
476 yield text[i:i + csize]
477 yield text[i:i + csize]
477 i += csize
478 i += csize
478
479
479 if to is None:
480 if to is None:
480 to = ''
481 to = ''
481 if tn is None:
482 if tn is None:
482 tn = ''
483 tn = ''
483
484
484 if to == tn:
485 if to == tn:
485 return ''
486 return ''
486
487
487 # TODO: deltas
488 # TODO: deltas
488 ret = []
489 ret = []
489 ret.append('GIT binary patch\n')
490 ret.append('GIT binary patch\n')
490 ret.append('literal %d\n' % len(tn))
491 ret.append('literal %d\n' % len(tn))
491 for l in chunk(zlib.compress(tn)):
492 for l in chunk(zlib.compress(tn)):
492 ret.append(fmtline(l))
493 ret.append(fmtline(l))
493 ret.append('\n')
494 ret.append('\n')
494
495
495 return ''.join(ret)
496 return ''.join(ret)
496
497
497 def patchtext(bin):
498 def patchtext(bin):
498 pos = 0
499 pos = 0
499 t = []
500 t = []
500 while pos < len(bin):
501 while pos < len(bin):
501 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
502 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
502 pos += 12
503 pos += 12
503 t.append(bin[pos:pos + l])
504 t.append(bin[pos:pos + l])
504 pos += l
505 pos += l
505 return "".join(t)
506 return "".join(t)
506
507
507 def patch(a, bin):
508 def patch(a, bin):
508 if len(a) == 0:
509 if len(a) == 0:
509 # skip over trivial delta header
510 # skip over trivial delta header
510 return util.buffer(bin, 12)
511 return util.buffer(bin, 12)
511 return mpatch.patches(a, [bin])
512 return mpatch.patches(a, [bin])
512
513
513 # similar to difflib.SequenceMatcher.get_matching_blocks
514 # similar to difflib.SequenceMatcher.get_matching_blocks
514 def get_matching_blocks(a, b):
515 def get_matching_blocks(a, b):
515 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
516 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
516
517
517 def trivialdiffheader(length):
518 def trivialdiffheader(length):
518 return struct.pack(">lll", 0, 0, length) if length else ''
519 return struct.pack(">lll", 0, 0, length) if length else ''
519
520
520 def replacediffheader(oldlen, newlen):
521 def replacediffheader(oldlen, newlen):
521 return struct.pack(">lll", 0, oldlen, newlen)
522 return struct.pack(">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now