##// END OF EJS Templates
mdiff: add a __str__ method to diffopts...
Matt Harbison -
r50792:a78dfb1a default
parent child Browse files
Show More
@@ -1,555 +1,562 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import re
9 import re
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from .i18n import _
13 from .i18n import _
14 from .pycompat import (
14 from .pycompat import (
15 getattr,
15 getattr,
16 setattr,
16 setattr,
17 )
17 )
18 from . import (
18 from . import (
19 diffhelper,
19 diffhelper,
20 encoding,
20 encoding,
21 error,
21 error,
22 policy,
22 policy,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26 from .utils import dateutil
26 from .utils import dateutil
27
27
28 bdiff = policy.importmod('bdiff')
28 bdiff = policy.importmod('bdiff')
29 mpatch = policy.importmod('mpatch')
29 mpatch = policy.importmod('mpatch')
30
30
31 blocks = bdiff.blocks
31 blocks = bdiff.blocks
32 fixws = bdiff.fixws
32 fixws = bdiff.fixws
33 patches = mpatch.patches
33 patches = mpatch.patches
34 patchedsize = mpatch.patchedsize
34 patchedsize = mpatch.patchedsize
35 textdiff = bdiff.bdiff
35 textdiff = bdiff.bdiff
36 splitnewlines = bdiff.splitnewlines
36 splitnewlines = bdiff.splitnewlines
37
37
38
38
39 # TODO: this looks like it could be an attrs, which might help pytype
39 # TODO: this looks like it could be an attrs, which might help pytype
40 class diffopts:
40 class diffopts:
41 """context is the number of context lines
41 """context is the number of context lines
42 text treats all files as text
42 text treats all files as text
43 showfunc enables diff -p output
43 showfunc enables diff -p output
44 git enables the git extended patch format
44 git enables the git extended patch format
45 nodates removes dates from diff headers
45 nodates removes dates from diff headers
46 nobinary ignores binary files
46 nobinary ignores binary files
47 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
47 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
48 ignorews ignores all whitespace changes in the diff
48 ignorews ignores all whitespace changes in the diff
49 ignorewsamount ignores changes in the amount of whitespace
49 ignorewsamount ignores changes in the amount of whitespace
50 ignoreblanklines ignores changes whose lines are all blank
50 ignoreblanklines ignores changes whose lines are all blank
51 upgrade generates git diffs to avoid data loss
51 upgrade generates git diffs to avoid data loss
52 """
52 """
53
53
54 _HAS_DYNAMIC_ATTRIBUTES = True
54 _HAS_DYNAMIC_ATTRIBUTES = True
55
55
56 defaults = {
56 defaults = {
57 b'context': 3,
57 b'context': 3,
58 b'text': False,
58 b'text': False,
59 b'showfunc': False,
59 b'showfunc': False,
60 b'git': False,
60 b'git': False,
61 b'nodates': False,
61 b'nodates': False,
62 b'nobinary': False,
62 b'nobinary': False,
63 b'noprefix': False,
63 b'noprefix': False,
64 b'index': 0,
64 b'index': 0,
65 b'ignorews': False,
65 b'ignorews': False,
66 b'ignorewsamount': False,
66 b'ignorewsamount': False,
67 b'ignorewseol': False,
67 b'ignorewseol': False,
68 b'ignoreblanklines': False,
68 b'ignoreblanklines': False,
69 b'upgrade': False,
69 b'upgrade': False,
70 b'showsimilarity': False,
70 b'showsimilarity': False,
71 b'worddiff': False,
71 b'worddiff': False,
72 b'xdiff': False,
72 b'xdiff': False,
73 }
73 }
74
74
75 def __init__(self, **opts):
75 def __init__(self, **opts):
76 opts = pycompat.byteskwargs(opts)
76 opts = pycompat.byteskwargs(opts)
77 for k in self.defaults.keys():
77 for k in self.defaults.keys():
78 v = opts.get(k)
78 v = opts.get(k)
79 if v is None:
79 if v is None:
80 v = self.defaults[k]
80 v = self.defaults[k]
81 setattr(self, k, v)
81 setattr(self, k, v)
82
82
83 try:
83 try:
84 self.context = int(self.context)
84 self.context = int(self.context)
85 except ValueError:
85 except ValueError:
86 raise error.InputError(
86 raise error.InputError(
87 _(b'diff context lines count must be an integer, not %r')
87 _(b'diff context lines count must be an integer, not %r')
88 % pycompat.bytestr(self.context)
88 % pycompat.bytestr(self.context)
89 )
89 )
90
90
91 def copy(self, **kwargs):
91 def copy(self, **kwargs):
92 opts = {k: getattr(self, k) for k in self.defaults}
92 opts = {k: getattr(self, k) for k in self.defaults}
93 opts = pycompat.strkwargs(opts)
93 opts = pycompat.strkwargs(opts)
94 opts.update(kwargs)
94 opts.update(kwargs)
95 return diffopts(**opts)
95 return diffopts(**opts)
96
96
97 def __bytes__(self):
98 return b", ".join(
99 b"%s: %r" % (k, getattr(self, k)) for k in self.defaults
100 )
101
102 __str__ = encoding.strmethod(__bytes__)
103
97
104
98 defaultopts = diffopts()
105 defaultopts = diffopts()
99
106
100
107
101 def wsclean(opts, text, blank=True):
108 def wsclean(opts, text, blank=True):
102 if opts.ignorews:
109 if opts.ignorews:
103 text = bdiff.fixws(text, 1)
110 text = bdiff.fixws(text, 1)
104 elif opts.ignorewsamount:
111 elif opts.ignorewsamount:
105 text = bdiff.fixws(text, 0)
112 text = bdiff.fixws(text, 0)
106 if blank and opts.ignoreblanklines:
113 if blank and opts.ignoreblanklines:
107 text = re.sub(b'\n+', b'\n', text).strip(b'\n')
114 text = re.sub(b'\n+', b'\n', text).strip(b'\n')
108 if opts.ignorewseol:
115 if opts.ignorewseol:
109 text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
116 text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
110 return text
117 return text
111
118
112
119
113 def splitblock(base1, lines1, base2, lines2, opts):
120 def splitblock(base1, lines1, base2, lines2, opts):
114 # The input lines matches except for interwoven blank lines. We
121 # The input lines matches except for interwoven blank lines. We
115 # transform it into a sequence of matching blocks and blank blocks.
122 # transform it into a sequence of matching blocks and blank blocks.
116 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
123 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
117 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
124 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
118 s1, e1 = 0, len(lines1)
125 s1, e1 = 0, len(lines1)
119 s2, e2 = 0, len(lines2)
126 s2, e2 = 0, len(lines2)
120 while s1 < e1 or s2 < e2:
127 while s1 < e1 or s2 < e2:
121 i1, i2, btype = s1, s2, b'='
128 i1, i2, btype = s1, s2, b'='
122 if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
129 if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
123 # Consume the block of blank lines
130 # Consume the block of blank lines
124 btype = b'~'
131 btype = b'~'
125 while i1 < e1 and lines1[i1] == 0:
132 while i1 < e1 and lines1[i1] == 0:
126 i1 += 1
133 i1 += 1
127 while i2 < e2 and lines2[i2] == 0:
134 while i2 < e2 and lines2[i2] == 0:
128 i2 += 1
135 i2 += 1
129 else:
136 else:
130 # Consume the matching lines
137 # Consume the matching lines
131 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
138 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
132 i1 += 1
139 i1 += 1
133 i2 += 1
140 i2 += 1
134 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
141 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
135 s1 = i1
142 s1 = i1
136 s2 = i2
143 s2 = i2
137
144
138
145
139 def hunkinrange(hunk, linerange):
146 def hunkinrange(hunk, linerange):
140 """Return True if `hunk` defined as (start, length) is in `linerange`
147 """Return True if `hunk` defined as (start, length) is in `linerange`
141 defined as (lowerbound, upperbound).
148 defined as (lowerbound, upperbound).
142
149
143 >>> hunkinrange((5, 10), (2, 7))
150 >>> hunkinrange((5, 10), (2, 7))
144 True
151 True
145 >>> hunkinrange((5, 10), (6, 12))
152 >>> hunkinrange((5, 10), (6, 12))
146 True
153 True
147 >>> hunkinrange((5, 10), (13, 17))
154 >>> hunkinrange((5, 10), (13, 17))
148 True
155 True
149 >>> hunkinrange((5, 10), (3, 17))
156 >>> hunkinrange((5, 10), (3, 17))
150 True
157 True
151 >>> hunkinrange((5, 10), (1, 3))
158 >>> hunkinrange((5, 10), (1, 3))
152 False
159 False
153 >>> hunkinrange((5, 10), (18, 20))
160 >>> hunkinrange((5, 10), (18, 20))
154 False
161 False
155 >>> hunkinrange((5, 10), (1, 5))
162 >>> hunkinrange((5, 10), (1, 5))
156 False
163 False
157 >>> hunkinrange((5, 10), (15, 27))
164 >>> hunkinrange((5, 10), (15, 27))
158 False
165 False
159 """
166 """
160 start, length = hunk
167 start, length = hunk
161 lowerbound, upperbound = linerange
168 lowerbound, upperbound = linerange
162 return lowerbound < start + length and start < upperbound
169 return lowerbound < start + length and start < upperbound
163
170
164
171
165 def blocksinrange(blocks, rangeb):
172 def blocksinrange(blocks, rangeb):
166 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
173 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
167 `rangeb` from ``(b1, b2)`` point of view.
174 `rangeb` from ``(b1, b2)`` point of view.
168
175
169 Return `filteredblocks, rangea` where:
176 Return `filteredblocks, rangea` where:
170
177
171 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
178 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
172 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
179 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
173 block ``(b1, b2)`` being inside `rangeb` if
180 block ``(b1, b2)`` being inside `rangeb` if
174 ``rangeb[0] < b2 and b1 < rangeb[1]``;
181 ``rangeb[0] < b2 and b1 < rangeb[1]``;
175 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
182 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
176 """
183 """
177 lbb, ubb = rangeb
184 lbb, ubb = rangeb
178 lba, uba = None, None
185 lba, uba = None, None
179 filteredblocks = []
186 filteredblocks = []
180 for block in blocks:
187 for block in blocks:
181 (a1, a2, b1, b2), stype = block
188 (a1, a2, b1, b2), stype = block
182 if lbb >= b1 and ubb <= b2 and stype == b'=':
189 if lbb >= b1 and ubb <= b2 and stype == b'=':
183 # rangeb is within a single "=" hunk, restrict back linerange1
190 # rangeb is within a single "=" hunk, restrict back linerange1
184 # by offsetting rangeb
191 # by offsetting rangeb
185 lba = lbb - b1 + a1
192 lba = lbb - b1 + a1
186 uba = ubb - b1 + a1
193 uba = ubb - b1 + a1
187 else:
194 else:
188 if b1 <= lbb < b2:
195 if b1 <= lbb < b2:
189 if stype == b'=':
196 if stype == b'=':
190 lba = a2 - (b2 - lbb)
197 lba = a2 - (b2 - lbb)
191 else:
198 else:
192 lba = a1
199 lba = a1
193 if b1 < ubb <= b2:
200 if b1 < ubb <= b2:
194 if stype == b'=':
201 if stype == b'=':
195 uba = a1 + (ubb - b1)
202 uba = a1 + (ubb - b1)
196 else:
203 else:
197 uba = a2
204 uba = a2
198 if hunkinrange((b1, (b2 - b1)), rangeb):
205 if hunkinrange((b1, (b2 - b1)), rangeb):
199 filteredblocks.append(block)
206 filteredblocks.append(block)
200 if lba is None or uba is None or uba < lba:
207 if lba is None or uba is None or uba < lba:
201 raise error.InputError(_(b'line range exceeds file size'))
208 raise error.InputError(_(b'line range exceeds file size'))
202 return filteredblocks, (lba, uba)
209 return filteredblocks, (lba, uba)
203
210
204
211
205 def chooseblocksfunc(opts=None):
212 def chooseblocksfunc(opts=None):
206 if (
213 if (
207 opts is None
214 opts is None
208 or not opts.xdiff
215 or not opts.xdiff
209 or not util.safehasattr(bdiff, b'xdiffblocks')
216 or not util.safehasattr(bdiff, b'xdiffblocks')
210 ):
217 ):
211 return bdiff.blocks
218 return bdiff.blocks
212 else:
219 else:
213 return bdiff.xdiffblocks
220 return bdiff.xdiffblocks
214
221
215
222
216 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
223 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
217 """Return (block, type) tuples, where block is an mdiff.blocks
224 """Return (block, type) tuples, where block is an mdiff.blocks
218 line entry. type is '=' for blocks matching exactly one another
225 line entry. type is '=' for blocks matching exactly one another
219 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
226 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
220 matching only after having filtered blank lines.
227 matching only after having filtered blank lines.
221 line1 and line2 are text1 and text2 split with splitnewlines() if
228 line1 and line2 are text1 and text2 split with splitnewlines() if
222 they are already available.
229 they are already available.
223 """
230 """
224 if opts is None:
231 if opts is None:
225 opts = defaultopts
232 opts = defaultopts
226 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
233 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
227 text1 = wsclean(opts, text1, False)
234 text1 = wsclean(opts, text1, False)
228 text2 = wsclean(opts, text2, False)
235 text2 = wsclean(opts, text2, False)
229 diff = chooseblocksfunc(opts)(text1, text2)
236 diff = chooseblocksfunc(opts)(text1, text2)
230 for i, s1 in enumerate(diff):
237 for i, s1 in enumerate(diff):
231 # The first match is special.
238 # The first match is special.
232 # we've either found a match starting at line 0 or a match later
239 # we've either found a match starting at line 0 or a match later
233 # in the file. If it starts later, old and new below will both be
240 # in the file. If it starts later, old and new below will both be
234 # empty and we'll continue to the next match.
241 # empty and we'll continue to the next match.
235 if i > 0:
242 if i > 0:
236 s = diff[i - 1]
243 s = diff[i - 1]
237 else:
244 else:
238 s = [0, 0, 0, 0]
245 s = [0, 0, 0, 0]
239 s = [s[1], s1[0], s[3], s1[2]]
246 s = [s[1], s1[0], s[3], s1[2]]
240
247
241 # bdiff sometimes gives huge matches past eof, this check eats them,
248 # bdiff sometimes gives huge matches past eof, this check eats them,
242 # and deals with the special first match case described above
249 # and deals with the special first match case described above
243 if s[0] != s[1] or s[2] != s[3]:
250 if s[0] != s[1] or s[2] != s[3]:
244 type = b'!'
251 type = b'!'
245 if opts.ignoreblanklines:
252 if opts.ignoreblanklines:
246 if lines1 is None:
253 if lines1 is None:
247 lines1 = splitnewlines(text1)
254 lines1 = splitnewlines(text1)
248 if lines2 is None:
255 if lines2 is None:
249 lines2 = splitnewlines(text2)
256 lines2 = splitnewlines(text2)
250 old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
257 old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
251 new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
258 new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
252 if old == new:
259 if old == new:
253 type = b'~'
260 type = b'~'
254 yield s, type
261 yield s, type
255 yield s1, b'='
262 yield s1, b'='
256
263
257
264
258 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
265 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
259 """Return a unified diff as a (headers, hunks) tuple.
266 """Return a unified diff as a (headers, hunks) tuple.
260
267
261 If the diff is not null, `headers` is a list with unified diff header
268 If the diff is not null, `headers` is a list with unified diff header
262 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
269 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
263 (hunkrange, hunklines) coming from _unidiff().
270 (hunkrange, hunklines) coming from _unidiff().
264 Otherwise, `headers` and `hunks` are empty.
271 Otherwise, `headers` and `hunks` are empty.
265
272
266 Set binary=True if either a or b should be taken as a binary file.
273 Set binary=True if either a or b should be taken as a binary file.
267 """
274 """
268
275
269 def datetag(date, fn=None):
276 def datetag(date, fn=None):
270 if not opts.git and not opts.nodates:
277 if not opts.git and not opts.nodates:
271 return b'\t%s' % date
278 return b'\t%s' % date
272 if fn and b' ' in fn:
279 if fn and b' ' in fn:
273 return b'\t'
280 return b'\t'
274 return b''
281 return b''
275
282
276 sentinel = [], ()
283 sentinel = [], ()
277 if not a and not b:
284 if not a and not b:
278 return sentinel
285 return sentinel
279
286
280 if opts.noprefix:
287 if opts.noprefix:
281 aprefix = bprefix = b''
288 aprefix = bprefix = b''
282 else:
289 else:
283 aprefix = b'a/'
290 aprefix = b'a/'
284 bprefix = b'b/'
291 bprefix = b'b/'
285
292
286 epoch = dateutil.datestr((0, 0))
293 epoch = dateutil.datestr((0, 0))
287
294
288 fn1 = util.pconvert(fn1)
295 fn1 = util.pconvert(fn1)
289 fn2 = util.pconvert(fn2)
296 fn2 = util.pconvert(fn2)
290
297
291 if binary:
298 if binary:
292 if a and b and len(a) == len(b) and a == b:
299 if a and b and len(a) == len(b) and a == b:
293 return sentinel
300 return sentinel
294 headerlines = []
301 headerlines = []
295 hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
302 hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
296 elif not a:
303 elif not a:
297 without_newline = not b.endswith(b'\n')
304 without_newline = not b.endswith(b'\n')
298 b = splitnewlines(b)
305 b = splitnewlines(b)
299 if a is None:
306 if a is None:
300 l1 = b'--- /dev/null%s' % datetag(epoch)
307 l1 = b'--- /dev/null%s' % datetag(epoch)
301 else:
308 else:
302 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
309 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
303 l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
310 l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
304 headerlines = [l1, l2]
311 headerlines = [l1, l2]
305 size = len(b)
312 size = len(b)
306 hunkrange = (0, 0, 1, size)
313 hunkrange = (0, 0, 1, size)
307 hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
314 hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
308 if without_newline:
315 if without_newline:
309 hunklines[-1] += b'\n'
316 hunklines[-1] += b'\n'
310 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
317 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
311 hunks = ((hunkrange, hunklines),)
318 hunks = ((hunkrange, hunklines),)
312 elif not b:
319 elif not b:
313 without_newline = not a.endswith(b'\n')
320 without_newline = not a.endswith(b'\n')
314 a = splitnewlines(a)
321 a = splitnewlines(a)
315 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
322 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
316 if b is None:
323 if b is None:
317 l2 = b'+++ /dev/null%s' % datetag(epoch)
324 l2 = b'+++ /dev/null%s' % datetag(epoch)
318 else:
325 else:
319 l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
326 l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
320 headerlines = [l1, l2]
327 headerlines = [l1, l2]
321 size = len(a)
328 size = len(a)
322 hunkrange = (1, size, 0, 0)
329 hunkrange = (1, size, 0, 0)
323 hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
330 hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
324 if without_newline:
331 if without_newline:
325 hunklines[-1] += b'\n'
332 hunklines[-1] += b'\n'
326 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
333 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
327 hunks = ((hunkrange, hunklines),)
334 hunks = ((hunkrange, hunklines),)
328 else:
335 else:
329 hunks = _unidiff(a, b, opts=opts)
336 hunks = _unidiff(a, b, opts=opts)
330 if not next(hunks):
337 if not next(hunks):
331 return sentinel
338 return sentinel
332
339
333 headerlines = [
340 headerlines = [
334 b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
341 b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
335 b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
342 b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
336 ]
343 ]
337
344
338 return headerlines, hunks
345 return headerlines, hunks
339
346
340
347
341 def _unidiff(t1, t2, opts=defaultopts):
348 def _unidiff(t1, t2, opts=defaultopts):
342 """Yield hunks of a headerless unified diff from t1 and t2 texts.
349 """Yield hunks of a headerless unified diff from t1 and t2 texts.
343
350
344 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
351 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
345 tuple (s1, l1, s2, l2) representing the range information of the hunk to
352 tuple (s1, l1, s2, l2) representing the range information of the hunk to
346 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
353 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
347 of the hunk combining said header followed by line additions and
354 of the hunk combining said header followed by line additions and
348 deletions.
355 deletions.
349
356
350 The hunks are prefixed with a bool.
357 The hunks are prefixed with a bool.
351 """
358 """
352 l1 = splitnewlines(t1)
359 l1 = splitnewlines(t1)
353 l2 = splitnewlines(t2)
360 l2 = splitnewlines(t2)
354
361
355 def contextend(l, len):
362 def contextend(l, len):
356 ret = l + opts.context
363 ret = l + opts.context
357 if ret > len:
364 if ret > len:
358 ret = len
365 ret = len
359 return ret
366 return ret
360
367
361 def contextstart(l):
368 def contextstart(l):
362 ret = l - opts.context
369 ret = l - opts.context
363 if ret < 0:
370 if ret < 0:
364 return 0
371 return 0
365 return ret
372 return ret
366
373
367 lastfunc = [0, b'']
374 lastfunc = [0, b'']
368
375
369 def yieldhunk(hunk):
376 def yieldhunk(hunk):
370 (astart, a2, bstart, b2, delta) = hunk
377 (astart, a2, bstart, b2, delta) = hunk
371 aend = contextend(a2, len(l1))
378 aend = contextend(a2, len(l1))
372 alen = aend - astart
379 alen = aend - astart
373 blen = b2 - bstart + aend - a2
380 blen = b2 - bstart + aend - a2
374
381
375 func = b""
382 func = b""
376 if opts.showfunc:
383 if opts.showfunc:
377 lastpos, func = lastfunc
384 lastpos, func = lastfunc
378 # walk backwards from the start of the context up to the start of
385 # walk backwards from the start of the context up to the start of
379 # the previous hunk context until we find a line starting with an
386 # the previous hunk context until we find a line starting with an
380 # alphanumeric char.
387 # alphanumeric char.
381 for i in range(astart - 1, lastpos - 1, -1):
388 for i in range(astart - 1, lastpos - 1, -1):
382 if l1[i][0:1].isalnum():
389 if l1[i][0:1].isalnum():
383 func = b' ' + l1[i].rstrip()
390 func = b' ' + l1[i].rstrip()
384 # split long function name if ASCII. otherwise we have no
391 # split long function name if ASCII. otherwise we have no
385 # idea where the multi-byte boundary is, so just leave it.
392 # idea where the multi-byte boundary is, so just leave it.
386 if encoding.isasciistr(func):
393 if encoding.isasciistr(func):
387 func = func[:41]
394 func = func[:41]
388 lastfunc[1] = func
395 lastfunc[1] = func
389 break
396 break
390 # by recording this hunk's starting point as the next place to
397 # by recording this hunk's starting point as the next place to
391 # start looking for function lines, we avoid reading any line in
398 # start looking for function lines, we avoid reading any line in
392 # the file more than once.
399 # the file more than once.
393 lastfunc[0] = astart
400 lastfunc[0] = astart
394
401
395 # zero-length hunk ranges report their start line as one less
402 # zero-length hunk ranges report their start line as one less
396 if alen:
403 if alen:
397 astart += 1
404 astart += 1
398 if blen:
405 if blen:
399 bstart += 1
406 bstart += 1
400
407
401 hunkrange = astart, alen, bstart, blen
408 hunkrange = astart, alen, bstart, blen
402 hunklines = (
409 hunklines = (
403 [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
410 [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
404 + delta
411 + delta
405 + [b' ' + l1[x] for x in range(a2, aend)]
412 + [b' ' + l1[x] for x in range(a2, aend)]
406 )
413 )
407 # If either file ends without a newline and the last line of
414 # If either file ends without a newline and the last line of
408 # that file is part of a hunk, a marker is printed. If the
415 # that file is part of a hunk, a marker is printed. If the
409 # last line of both files is identical and neither ends in
416 # last line of both files is identical and neither ends in
410 # a newline, print only one marker. That's the only case in
417 # a newline, print only one marker. That's the only case in
411 # which the hunk can end in a shared line without a newline.
418 # which the hunk can end in a shared line without a newline.
412 skip = False
419 skip = False
413 if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
420 if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
414 for i in range(len(hunklines) - 1, -1, -1):
421 for i in range(len(hunklines) - 1, -1, -1):
415 if hunklines[i].startswith((b'-', b' ')):
422 if hunklines[i].startswith((b'-', b' ')):
416 if hunklines[i].startswith(b' '):
423 if hunklines[i].startswith(b' '):
417 skip = True
424 skip = True
418 hunklines[i] += b'\n'
425 hunklines[i] += b'\n'
419 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
426 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
420 break
427 break
421 if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
428 if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
422 for i in range(len(hunklines) - 1, -1, -1):
429 for i in range(len(hunklines) - 1, -1, -1):
423 if hunklines[i].startswith(b'+'):
430 if hunklines[i].startswith(b'+'):
424 hunklines[i] += b'\n'
431 hunklines[i] += b'\n'
425 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
432 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
426 break
433 break
427 yield hunkrange, hunklines
434 yield hunkrange, hunklines
428
435
429 # bdiff.blocks gives us the matching sequences in the files. The loop
436 # bdiff.blocks gives us the matching sequences in the files. The loop
430 # below finds the spaces between those matching sequences and translates
437 # below finds the spaces between those matching sequences and translates
431 # them into diff output.
438 # them into diff output.
432 #
439 #
433 hunk = None
440 hunk = None
434 ignoredlines = 0
441 ignoredlines = 0
435 has_hunks = False
442 has_hunks = False
436 for s, stype in allblocks(t1, t2, opts, l1, l2):
443 for s, stype in allblocks(t1, t2, opts, l1, l2):
437 a1, a2, b1, b2 = s
444 a1, a2, b1, b2 = s
438 if stype != b'!':
445 if stype != b'!':
439 if stype == b'~':
446 if stype == b'~':
440 # The diff context lines are based on t1 content. When
447 # The diff context lines are based on t1 content. When
441 # blank lines are ignored, the new lines offsets must
448 # blank lines are ignored, the new lines offsets must
442 # be adjusted as if equivalent blocks ('~') had the
449 # be adjusted as if equivalent blocks ('~') had the
443 # same sizes on both sides.
450 # same sizes on both sides.
444 ignoredlines += (b2 - b1) - (a2 - a1)
451 ignoredlines += (b2 - b1) - (a2 - a1)
445 continue
452 continue
446 delta = []
453 delta = []
447 old = l1[a1:a2]
454 old = l1[a1:a2]
448 new = l2[b1:b2]
455 new = l2[b1:b2]
449
456
450 b1 -= ignoredlines
457 b1 -= ignoredlines
451 b2 -= ignoredlines
458 b2 -= ignoredlines
452 astart = contextstart(a1)
459 astart = contextstart(a1)
453 bstart = contextstart(b1)
460 bstart = contextstart(b1)
454 prev = None
461 prev = None
455 if hunk:
462 if hunk:
456 # join with the previous hunk if it falls inside the context
463 # join with the previous hunk if it falls inside the context
457 if astart < hunk[1] + opts.context + 1:
464 if astart < hunk[1] + opts.context + 1:
458 prev = hunk
465 prev = hunk
459 astart = hunk[1]
466 astart = hunk[1]
460 bstart = hunk[3]
467 bstart = hunk[3]
461 else:
468 else:
462 if not has_hunks:
469 if not has_hunks:
463 has_hunks = True
470 has_hunks = True
464 yield True
471 yield True
465 for x in yieldhunk(hunk):
472 for x in yieldhunk(hunk):
466 yield x
473 yield x
467 if prev:
474 if prev:
468 # we've joined the previous hunk, record the new ending points.
475 # we've joined the previous hunk, record the new ending points.
469 hunk[1] = a2
476 hunk[1] = a2
470 hunk[3] = b2
477 hunk[3] = b2
471 delta = hunk[4]
478 delta = hunk[4]
472 else:
479 else:
473 # create a new hunk
480 # create a new hunk
474 hunk = [astart, a2, bstart, b2, delta]
481 hunk = [astart, a2, bstart, b2, delta]
475
482
476 delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
483 delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
477 delta[len(delta) :] = [b'-' + x for x in old]
484 delta[len(delta) :] = [b'-' + x for x in old]
478 delta[len(delta) :] = [b'+' + x for x in new]
485 delta[len(delta) :] = [b'+' + x for x in new]
479
486
480 if hunk:
487 if hunk:
481 if not has_hunks:
488 if not has_hunks:
482 has_hunks = True
489 has_hunks = True
483 yield True
490 yield True
484 for x in yieldhunk(hunk):
491 for x in yieldhunk(hunk):
485 yield x
492 yield x
486 elif not has_hunks:
493 elif not has_hunks:
487 yield False
494 yield False
488
495
489
496
490 def b85diff(to, tn):
497 def b85diff(to, tn):
491 '''print base85-encoded binary diff'''
498 '''print base85-encoded binary diff'''
492
499
493 def fmtline(line):
500 def fmtline(line):
494 l = len(line)
501 l = len(line)
495 if l <= 26:
502 if l <= 26:
496 l = pycompat.bytechr(ord(b'A') + l - 1)
503 l = pycompat.bytechr(ord(b'A') + l - 1)
497 else:
504 else:
498 l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
505 l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
499 return b'%c%s\n' % (l, util.b85encode(line, True))
506 return b'%c%s\n' % (l, util.b85encode(line, True))
500
507
501 def chunk(text, csize=52):
508 def chunk(text, csize=52):
502 l = len(text)
509 l = len(text)
503 i = 0
510 i = 0
504 while i < l:
511 while i < l:
505 yield text[i : i + csize]
512 yield text[i : i + csize]
506 i += csize
513 i += csize
507
514
508 if to is None:
515 if to is None:
509 to = b''
516 to = b''
510 if tn is None:
517 if tn is None:
511 tn = b''
518 tn = b''
512
519
513 if to == tn:
520 if to == tn:
514 return b''
521 return b''
515
522
516 # TODO: deltas
523 # TODO: deltas
517 ret = []
524 ret = []
518 ret.append(b'GIT binary patch\n')
525 ret.append(b'GIT binary patch\n')
519 ret.append(b'literal %d\n' % len(tn))
526 ret.append(b'literal %d\n' % len(tn))
520 for l in chunk(zlib.compress(tn)):
527 for l in chunk(zlib.compress(tn)):
521 ret.append(fmtline(l))
528 ret.append(fmtline(l))
522 ret.append(b'\n')
529 ret.append(b'\n')
523
530
524 return b''.join(ret)
531 return b''.join(ret)
525
532
526
533
527 def patchtext(bin):
534 def patchtext(bin):
528 pos = 0
535 pos = 0
529 t = []
536 t = []
530 while pos < len(bin):
537 while pos < len(bin):
531 p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
538 p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
532 pos += 12
539 pos += 12
533 t.append(bin[pos : pos + l])
540 t.append(bin[pos : pos + l])
534 pos += l
541 pos += l
535 return b"".join(t)
542 return b"".join(t)
536
543
537
544
538 def patch(a, bin):
545 def patch(a, bin):
539 if len(a) == 0:
546 if len(a) == 0:
540 # skip over trivial delta header
547 # skip over trivial delta header
541 return util.buffer(bin, 12)
548 return util.buffer(bin, 12)
542 return mpatch.patches(a, [bin])
549 return mpatch.patches(a, [bin])
543
550
544
551
545 # similar to difflib.SequenceMatcher.get_matching_blocks
552 # similar to difflib.SequenceMatcher.get_matching_blocks
546 def get_matching_blocks(a, b):
553 def get_matching_blocks(a, b):
547 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
554 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
548
555
549
556
550 def trivialdiffheader(length):
557 def trivialdiffheader(length):
551 return struct.pack(b">lll", 0, 0, length) if length else b''
558 return struct.pack(b">lll", 0, 0, length) if length else b''
552
559
553
560
554 def replacediffheader(oldlen, newlen):
561 def replacediffheader(oldlen, newlen):
555 return struct.pack(b">lll", 0, oldlen, newlen)
562 return struct.pack(b">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now