##// END OF EJS Templates
typing: add type annotations to `mercurial/mdiff.py`...
Matt Harbison -
r52829:c6899b33 default
parent child Browse files
Show More
@@ -1,563 +1,621
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import annotations
8 from __future__ import annotations
9
9
10 import re
10 import re
11 import struct
11 import struct
12 import typing
12 import zlib
13 import zlib
13
14
15 from typing import (
16 Iterable,
17 Iterator,
18 List,
19 Optional,
20 Sequence,
21 Tuple,
22 Union,
23 cast,
24 )
25
14 from .i18n import _
26 from .i18n import _
15 from . import (
27 from . import (
16 diffhelper,
28 diffhelper,
17 encoding,
29 encoding,
18 error,
30 error,
19 policy,
31 policy,
20 pycompat,
32 pycompat,
21 util,
33 util,
22 )
34 )
23 from .interfaces import (
35 from .interfaces import (
24 modules as intmod,
36 modules as intmod,
25 )
37 )
26
38
27 from .utils import dateutil
39 from .utils import dateutil
28
40
29 bdiff: intmod.BDiff = policy.importmod('bdiff')
41 bdiff: intmod.BDiff = policy.importmod('bdiff')
30 mpatch = policy.importmod('mpatch')
42 mpatch = policy.importmod('mpatch')
31
43
32 blocks = bdiff.blocks
44 blocks = bdiff.blocks
33 fixws = bdiff.fixws
45 fixws = bdiff.fixws
34 patches = mpatch.patches
46 patches = mpatch.patches
35 patchedsize = mpatch.patchedsize
47 patchedsize = mpatch.patchedsize
36 textdiff = bdiff.bdiff
48 textdiff = bdiff.bdiff
37 splitnewlines = bdiff.splitnewlines
49 splitnewlines = bdiff.splitnewlines
38
50
51 if typing.TYPE_CHECKING:
52 HunkLines = List[bytes]
53 """Lines of a hunk- a header, followed by line additions and deletions."""
54
55 HunkRange = Tuple[int, int, int, int]
56 """HunkRange represents the range information of a hunk.
57
58 The tuple (s1, l1, s2, l2) forms the header '@@ -s1,l1 +s2,l2 @@'."""
59
60 Range = Tuple[int, int]
61 """A (lowerbound, upperbound) range tuple."""
62
63 TypedBlock = Tuple[intmod.BDiffBlock, bytes]
64 """A bdiff block with its type."""
65
39
66
40 # TODO: this looks like it could be an attrs, which might help pytype
67 # TODO: this looks like it could be an attrs, which might help pytype
41 class diffopts:
68 class diffopts:
42 """context is the number of context lines
69 """context is the number of context lines
43 text treats all files as text
70 text treats all files as text
44 showfunc enables diff -p output
71 showfunc enables diff -p output
45 git enables the git extended patch format
72 git enables the git extended patch format
46 nodates removes dates from diff headers
73 nodates removes dates from diff headers
47 nobinary ignores binary files
74 nobinary ignores binary files
48 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
75 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
49 ignorews ignores all whitespace changes in the diff
76 ignorews ignores all whitespace changes in the diff
50 ignorewsamount ignores changes in the amount of whitespace
77 ignorewsamount ignores changes in the amount of whitespace
51 ignoreblanklines ignores changes whose lines are all blank
78 ignoreblanklines ignores changes whose lines are all blank
52 upgrade generates git diffs to avoid data loss
79 upgrade generates git diffs to avoid data loss
53 """
80 """
54
81
55 _HAS_DYNAMIC_ATTRIBUTES = True
82 _HAS_DYNAMIC_ATTRIBUTES = True
56
83
57 defaults = {
84 defaults = {
58 b'context': 3,
85 b'context': 3,
59 b'text': False,
86 b'text': False,
60 b'showfunc': False,
87 b'showfunc': False,
61 b'git': False,
88 b'git': False,
62 b'nodates': False,
89 b'nodates': False,
63 b'nobinary': False,
90 b'nobinary': False,
64 b'noprefix': False,
91 b'noprefix': False,
65 b'index': 0,
92 b'index': 0,
66 b'ignorews': False,
93 b'ignorews': False,
67 b'ignorewsamount': False,
94 b'ignorewsamount': False,
68 b'ignorewseol': False,
95 b'ignorewseol': False,
69 b'ignoreblanklines': False,
96 b'ignoreblanklines': False,
70 b'upgrade': False,
97 b'upgrade': False,
71 b'showsimilarity': False,
98 b'showsimilarity': False,
72 b'worddiff': False,
99 b'worddiff': False,
73 b'xdiff': False,
100 b'xdiff': False,
74 }
101 }
75
102
76 def __init__(self, **opts):
103 def __init__(self, **opts):
77 opts = pycompat.byteskwargs(opts)
104 opts = pycompat.byteskwargs(opts)
78 for k in self.defaults.keys():
105 for k in self.defaults.keys():
79 v = opts.get(k)
106 v = opts.get(k)
80 if v is None:
107 if v is None:
81 v = self.defaults[k]
108 v = self.defaults[k]
82 setattr(self, pycompat.sysstr(k), v)
109 setattr(self, pycompat.sysstr(k), v)
83
110
84 try:
111 try:
85 self.context = int(self.context)
112 self.context = int(self.context)
86 except ValueError:
113 except ValueError:
87 raise error.InputError(
114 raise error.InputError(
88 _(b'diff context lines count must be an integer, not %r')
115 _(b'diff context lines count must be an integer, not %r')
89 % pycompat.bytestr(self.context)
116 % pycompat.bytestr(self.context)
90 )
117 )
91
118
92 def copy(self, **kwargs):
119 def copy(self, **kwargs):
93 opts = {k: getattr(self, pycompat.sysstr(k)) for k in self.defaults}
120 opts = {k: getattr(self, pycompat.sysstr(k)) for k in self.defaults}
94 opts = pycompat.strkwargs(opts)
121 opts = pycompat.strkwargs(opts)
95 opts.update(kwargs)
122 opts.update(kwargs)
96 return diffopts(**opts)
123 return diffopts(**opts)
97
124
98 def __bytes__(self):
125 def __bytes__(self):
99 return b", ".join(
126 return b", ".join(
100 b"%s: %r" % (k, getattr(self, pycompat.sysstr(k)))
127 b"%s: %r" % (k, getattr(self, pycompat.sysstr(k)))
101 for k in self.defaults
128 for k in self.defaults
102 )
129 )
103
130
104 __str__ = encoding.strmethod(__bytes__)
131 __str__ = encoding.strmethod(__bytes__)
105
132
106
133
107 defaultopts = diffopts()
134 defaultopts = diffopts()
108
135
109
136
110 def wsclean(opts, text, blank=True):
137 def wsclean(opts: diffopts, text: bytes, blank: bool = True) -> bytes:
111 if opts.ignorews:
138 if opts.ignorews:
112 text = bdiff.fixws(text, True)
139 text = bdiff.fixws(text, True)
113 elif opts.ignorewsamount:
140 elif opts.ignorewsamount:
114 text = bdiff.fixws(text, False)
141 text = bdiff.fixws(text, False)
115 if blank and opts.ignoreblanklines:
142 if blank and opts.ignoreblanklines:
116 text = re.sub(b'\n+', b'\n', text).strip(b'\n')
143 text = re.sub(b'\n+', b'\n', text).strip(b'\n')
117 if opts.ignorewseol:
144 if opts.ignorewseol:
118 text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
145 text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
119 return text
146 return text
120
147
121
148
122 def splitblock(base1, lines1, base2, lines2, opts):
149 def splitblock(
150 base1: int,
151 lines1: Iterable[bytes],
152 base2: int,
153 lines2: Iterable[bytes],
154 opts: diffopts,
155 ) -> Iterable[TypedBlock]:
123 # The input lines matches except for interwoven blank lines. We
156 # The input lines matches except for interwoven blank lines. We
124 # transform it into a sequence of matching blocks and blank blocks.
157 # transform it into a sequence of matching blocks and blank blocks.
125 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
158 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
126 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
159 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
127 s1, e1 = 0, len(lines1)
160 s1, e1 = 0, len(lines1)
128 s2, e2 = 0, len(lines2)
161 s2, e2 = 0, len(lines2)
129 while s1 < e1 or s2 < e2:
162 while s1 < e1 or s2 < e2:
130 i1, i2, btype = s1, s2, b'='
163 i1, i2, btype = s1, s2, b'='
131 if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
164 if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
132 # Consume the block of blank lines
165 # Consume the block of blank lines
133 btype = b'~'
166 btype = b'~'
134 while i1 < e1 and lines1[i1] == 0:
167 while i1 < e1 and lines1[i1] == 0:
135 i1 += 1
168 i1 += 1
136 while i2 < e2 and lines2[i2] == 0:
169 while i2 < e2 and lines2[i2] == 0:
137 i2 += 1
170 i2 += 1
138 else:
171 else:
139 # Consume the matching lines
172 # Consume the matching lines
140 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
173 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
141 i1 += 1
174 i1 += 1
142 i2 += 1
175 i2 += 1
143 yield (base1 + s1, base1 + i1, base2 + s2, base2 + i2), btype
176 yield (base1 + s1, base1 + i1, base2 + s2, base2 + i2), btype
144 s1 = i1
177 s1 = i1
145 s2 = i2
178 s2 = i2
146
179
147
180
148 def hunkinrange(hunk, linerange):
181 def hunkinrange(hunk: Tuple[int, int], linerange: Range) -> bool:
149 """Return True if `hunk` defined as (start, length) is in `linerange`
182 """Return True if `hunk` defined as (start, length) is in `linerange`
150 defined as (lowerbound, upperbound).
183 defined as (lowerbound, upperbound).
151
184
152 >>> hunkinrange((5, 10), (2, 7))
185 >>> hunkinrange((5, 10), (2, 7))
153 True
186 True
154 >>> hunkinrange((5, 10), (6, 12))
187 >>> hunkinrange((5, 10), (6, 12))
155 True
188 True
156 >>> hunkinrange((5, 10), (13, 17))
189 >>> hunkinrange((5, 10), (13, 17))
157 True
190 True
158 >>> hunkinrange((5, 10), (3, 17))
191 >>> hunkinrange((5, 10), (3, 17))
159 True
192 True
160 >>> hunkinrange((5, 10), (1, 3))
193 >>> hunkinrange((5, 10), (1, 3))
161 False
194 False
162 >>> hunkinrange((5, 10), (18, 20))
195 >>> hunkinrange((5, 10), (18, 20))
163 False
196 False
164 >>> hunkinrange((5, 10), (1, 5))
197 >>> hunkinrange((5, 10), (1, 5))
165 False
198 False
166 >>> hunkinrange((5, 10), (15, 27))
199 >>> hunkinrange((5, 10), (15, 27))
167 False
200 False
168 """
201 """
169 start, length = hunk
202 start, length = hunk
170 lowerbound, upperbound = linerange
203 lowerbound, upperbound = linerange
171 return lowerbound < start + length and start < upperbound
204 return lowerbound < start + length and start < upperbound
172
205
173
206
174 def blocksinrange(blocks, rangeb):
207 def blocksinrange(
208 blocks: Iterable[TypedBlock], rangeb: Range
209 ) -> Tuple[List[TypedBlock], Range]:
175 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
210 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
176 `rangeb` from ``(b1, b2)`` point of view.
211 `rangeb` from ``(b1, b2)`` point of view.
177
212
178 Return `filteredblocks, rangea` where:
213 Return `filteredblocks, rangea` where:
179
214
180 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
215 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
181 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
216 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
182 block ``(b1, b2)`` being inside `rangeb` if
217 block ``(b1, b2)`` being inside `rangeb` if
183 ``rangeb[0] < b2 and b1 < rangeb[1]``;
218 ``rangeb[0] < b2 and b1 < rangeb[1]``;
184 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
219 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
185 """
220 """
186 lbb, ubb = rangeb
221 lbb, ubb = rangeb
187 lba, uba = None, None
222 lba, uba = None, None
188 filteredblocks = []
223 filteredblocks = []
189 for block in blocks:
224 for block in blocks:
190 (a1, a2, b1, b2), stype = block
225 (a1, a2, b1, b2), stype = block
191 if lbb >= b1 and ubb <= b2 and stype == b'=':
226 if lbb >= b1 and ubb <= b2 and stype == b'=':
192 # rangeb is within a single "=" hunk, restrict back linerange1
227 # rangeb is within a single "=" hunk, restrict back linerange1
193 # by offsetting rangeb
228 # by offsetting rangeb
194 lba = lbb - b1 + a1
229 lba = lbb - b1 + a1
195 uba = ubb - b1 + a1
230 uba = ubb - b1 + a1
196 else:
231 else:
197 if b1 <= lbb < b2:
232 if b1 <= lbb < b2:
198 if stype == b'=':
233 if stype == b'=':
199 lba = a2 - (b2 - lbb)
234 lba = a2 - (b2 - lbb)
200 else:
235 else:
201 lba = a1
236 lba = a1
202 if b1 < ubb <= b2:
237 if b1 < ubb <= b2:
203 if stype == b'=':
238 if stype == b'=':
204 uba = a1 + (ubb - b1)
239 uba = a1 + (ubb - b1)
205 else:
240 else:
206 uba = a2
241 uba = a2
207 if hunkinrange((b1, (b2 - b1)), rangeb):
242 if hunkinrange((b1, (b2 - b1)), rangeb):
208 filteredblocks.append(block)
243 filteredblocks.append(block)
209 if lba is None or uba is None or uba < lba:
244 if lba is None or uba is None or uba < lba:
210 raise error.InputError(_(b'line range exceeds file size'))
245 raise error.InputError(_(b'line range exceeds file size'))
211 return filteredblocks, (lba, uba)
246 return filteredblocks, (lba, uba)
212
247
213
248
214 def chooseblocksfunc(opts=None):
249 def chooseblocksfunc(opts: Optional[diffopts] = None) -> intmod.BDiffBlocksFnc:
215 if (
250 if (
216 opts is None
251 opts is None
217 or not opts.xdiff
252 or not opts.xdiff
218 or not getattr(bdiff, 'xdiffblocks', None)
253 or not getattr(bdiff, 'xdiffblocks', None)
219 ):
254 ):
220 return bdiff.blocks
255 return bdiff.blocks
221 else:
256 else:
222 return bdiff.xdiffblocks
257 return bdiff.xdiffblocks
223
258
224
259
225 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
260 def allblocks(
261 text1: bytes,
262 text2: bytes,
263 opts: Optional[diffopts] = None,
264 lines1: Optional[Sequence[bytes]] = None,
265 lines2: Optional[Sequence[bytes]] = None,
266 ) -> Iterable[TypedBlock]:
226 """Return (block, type) tuples, where block is an mdiff.blocks
267 """Return (block, type) tuples, where block is an mdiff.blocks
227 line entry. type is '=' for blocks matching exactly one another
268 line entry. type is '=' for blocks matching exactly one another
228 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
269 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
229 matching only after having filtered blank lines.
270 matching only after having filtered blank lines.
230 line1 and line2 are text1 and text2 split with splitnewlines() if
271 line1 and line2 are text1 and text2 split with splitnewlines() if
231 they are already available.
272 they are already available.
232 """
273 """
233 if opts is None:
274 if opts is None:
234 opts = defaultopts
275 opts = defaultopts
235 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
276 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
236 text1 = wsclean(opts, text1, False)
277 text1 = wsclean(opts, text1, False)
237 text2 = wsclean(opts, text2, False)
278 text2 = wsclean(opts, text2, False)
238 diff = chooseblocksfunc(opts)(text1, text2)
279 diff = chooseblocksfunc(opts)(text1, text2)
239 for i, s1 in enumerate(diff):
280 for i, s1 in enumerate(diff):
240 # The first match is special.
281 # The first match is special.
241 # we've either found a match starting at line 0 or a match later
282 # we've either found a match starting at line 0 or a match later
242 # in the file. If it starts later, old and new below will both be
283 # in the file. If it starts later, old and new below will both be
243 # empty and we'll continue to the next match.
284 # empty and we'll continue to the next match.
244 if i > 0:
285 if i > 0:
245 s = diff[i - 1]
286 s = diff[i - 1]
246 else:
287 else:
247 s = (0, 0, 0, 0)
288 s = (0, 0, 0, 0)
248 s = (s[1], s1[0], s[3], s1[2])
289 s = (s[1], s1[0], s[3], s1[2])
249
290
250 # bdiff sometimes gives huge matches past eof, this check eats them,
291 # bdiff sometimes gives huge matches past eof, this check eats them,
251 # and deals with the special first match case described above
292 # and deals with the special first match case described above
252 if s[0] != s[1] or s[2] != s[3]:
293 if s[0] != s[1] or s[2] != s[3]:
253 type = b'!'
294 type = b'!'
254 if opts.ignoreblanklines:
295 if opts.ignoreblanklines:
255 if lines1 is None:
296 if lines1 is None:
256 lines1 = splitnewlines(text1)
297 lines1 = splitnewlines(text1)
257 if lines2 is None:
298 if lines2 is None:
258 lines2 = splitnewlines(text2)
299 lines2 = splitnewlines(text2)
259 old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
300 old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
260 new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
301 new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
261 if old == new:
302 if old == new:
262 type = b'~'
303 type = b'~'
263 yield s, type
304 yield s, type
264 yield s1, b'='
305 yield s1, b'='
265
306
266
307
267 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
308 def unidiff(
309 a: bytes,
310 ad: bytes,
311 b: bytes,
312 bd: bytes,
313 fn1: bytes,
314 fn2: bytes,
315 binary: bool,
316 opts: diffopts = defaultopts,
317 ) -> Tuple[List[bytes], Iterable[Tuple[Optional[HunkRange], HunkLines]]]:
268 """Return a unified diff as a (headers, hunks) tuple.
318 """Return a unified diff as a (headers, hunks) tuple.
269
319
270 If the diff is not null, `headers` is a list with unified diff header
320 If the diff is not null, `headers` is a list with unified diff header
271 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
321 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
272 (hunkrange, hunklines) coming from _unidiff().
322 (hunkrange, hunklines) coming from _unidiff().
273 Otherwise, `headers` and `hunks` are empty.
323 Otherwise, `headers` and `hunks` are empty.
274
324
275 Set binary=True if either a or b should be taken as a binary file.
325 Set binary=True if either a or b should be taken as a binary file.
276 """
326 """
277
327
278 def datetag(date, fn=None):
328 def datetag(date: bytes, fn: Optional[bytes] = None):
279 if not opts.git and not opts.nodates:
329 if not opts.git and not opts.nodates:
280 return b'\t%s' % date
330 return b'\t%s' % date
281 if fn and b' ' in fn:
331 if fn and b' ' in fn:
282 return b'\t'
332 return b'\t'
283 return b''
333 return b''
284
334
285 sentinel = [], ()
335 sentinel = [], ()
286 if not a and not b:
336 if not a and not b:
287 return sentinel
337 return sentinel
288
338
289 if opts.noprefix:
339 if opts.noprefix:
290 aprefix = bprefix = b''
340 aprefix = bprefix = b''
291 else:
341 else:
292 aprefix = b'a/'
342 aprefix = b'a/'
293 bprefix = b'b/'
343 bprefix = b'b/'
294
344
295 epoch = dateutil.datestr((0, 0))
345 epoch = dateutil.datestr((0, 0))
296
346
297 fn1 = util.pconvert(fn1)
347 fn1 = util.pconvert(fn1)
298 fn2 = util.pconvert(fn2)
348 fn2 = util.pconvert(fn2)
299
349
300 if binary:
350 if binary:
301 if a and b and len(a) == len(b) and a == b:
351 if a and b and len(a) == len(b) and a == b:
302 return sentinel
352 return sentinel
303 headerlines = []
353 headerlines = []
304 hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
354 hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
305 elif not a:
355 elif not a:
306 without_newline = not b.endswith(b'\n')
356 without_newline = not b.endswith(b'\n')
307 b = splitnewlines(b)
357 b = splitnewlines(b)
308 if a is None:
358 if a is None:
309 l1 = b'--- /dev/null%s' % datetag(epoch)
359 l1 = b'--- /dev/null%s' % datetag(epoch)
310 else:
360 else:
311 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
361 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
312 l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
362 l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
313 headerlines = [l1, l2]
363 headerlines = [l1, l2]
314 size = len(b)
364 size = len(b)
315 hunkrange = (0, 0, 1, size)
365 hunkrange = (0, 0, 1, size)
316 hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
366 hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
317 if without_newline:
367 if without_newline:
318 hunklines[-1] += b'\n'
368 hunklines[-1] += b'\n'
319 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
369 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
320 hunks = ((hunkrange, hunklines),)
370 hunks = ((hunkrange, hunklines),)
321 elif not b:
371 elif not b:
322 without_newline = not a.endswith(b'\n')
372 without_newline = not a.endswith(b'\n')
323 a = splitnewlines(a)
373 a = splitnewlines(a)
324 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
374 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
325 if b is None:
375 if b is None:
326 l2 = b'+++ /dev/null%s' % datetag(epoch)
376 l2 = b'+++ /dev/null%s' % datetag(epoch)
327 else:
377 else:
328 l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
378 l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
329 headerlines = [l1, l2]
379 headerlines = [l1, l2]
330 size = len(a)
380 size = len(a)
331 hunkrange = (1, size, 0, 0)
381 hunkrange = (1, size, 0, 0)
332 hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
382 hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
333 if without_newline:
383 if without_newline:
334 hunklines[-1] += b'\n'
384 hunklines[-1] += b'\n'
335 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
385 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
336 hunks = ((hunkrange, hunklines),)
386 hunks = ((hunkrange, hunklines),)
337 else:
387 else:
338 hunks = _unidiff(a, b, opts=opts)
388 hunks = _unidiff(a, b, opts=opts)
339 if not next(hunks):
389 if not next(hunks):
340 return sentinel
390 return sentinel
341
391
342 headerlines = [
392 headerlines = [
343 b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
393 b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
344 b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
394 b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
345 ]
395 ]
346
396
347 return headerlines, hunks
397 # The possible bool is consumed from the iterator above in the `next()`
398 # call.
399 return headerlines, cast(
400 "Iterable[Tuple[Optional[HunkRange], HunkLines]]", hunks
401 )
348
402
349
403
350 def _unidiff(t1, t2, opts=defaultopts):
404 def _unidiff(
405 t1: bytes, t2: bytes, opts: diffopts = defaultopts
406 ) -> Iterator[Union[bool, Tuple[HunkRange, HunkLines]]]:
351 """Yield hunks of a headerless unified diff from t1 and t2 texts.
407 """Yield hunks of a headerless unified diff from t1 and t2 texts.
352
408
353 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
409 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
354 tuple (s1, l1, s2, l2) representing the range information of the hunk to
410 tuple (s1, l1, s2, l2) representing the range information of the hunk to
355 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
411 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
356 of the hunk combining said header followed by line additions and
412 of the hunk combining said header followed by line additions and
357 deletions.
413 deletions.
358
414
359 The hunks are prefixed with a bool.
415 The hunks are prefixed with a bool.
360 """
416 """
361 l1 = splitnewlines(t1)
417 l1 = splitnewlines(t1)
362 l2 = splitnewlines(t2)
418 l2 = splitnewlines(t2)
363
419
364 def contextend(l, len):
420 def contextend(l, len):
365 ret = l + opts.context
421 ret = l + opts.context
366 if ret > len:
422 if ret > len:
367 ret = len
423 ret = len
368 return ret
424 return ret
369
425
370 def contextstart(l):
426 def contextstart(l):
371 ret = l - opts.context
427 ret = l - opts.context
372 if ret < 0:
428 if ret < 0:
373 return 0
429 return 0
374 return ret
430 return ret
375
431
376 lastfunc = [0, b'']
432 lastfunc = [0, b'']
377
433
378 def yieldhunk(hunk):
434 def yieldhunk(
435 hunk: Tuple[int, int, int, int, List[bytes]]
436 ) -> Iterable[Tuple[HunkRange, HunkLines]]:
379 (astart, a2, bstart, b2, delta) = hunk
437 (astart, a2, bstart, b2, delta) = hunk
380 aend = contextend(a2, len(l1))
438 aend = contextend(a2, len(l1))
381 alen = aend - astart
439 alen = aend - astart
382 blen = b2 - bstart + aend - a2
440 blen = b2 - bstart + aend - a2
383
441
384 func = b""
442 func = b""
385 if opts.showfunc:
443 if opts.showfunc:
386 lastpos, func = lastfunc
444 lastpos, func = lastfunc
387 # walk backwards from the start of the context up to the start of
445 # walk backwards from the start of the context up to the start of
388 # the previous hunk context until we find a line starting with an
446 # the previous hunk context until we find a line starting with an
389 # alphanumeric char.
447 # alphanumeric char.
390 for i in range(astart - 1, lastpos - 1, -1):
448 for i in range(astart - 1, lastpos - 1, -1):
391 if l1[i][0:1].isalnum():
449 if l1[i][0:1].isalnum():
392 func = b' ' + l1[i].rstrip()
450 func = b' ' + l1[i].rstrip()
393 # split long function name if ASCII. otherwise we have no
451 # split long function name if ASCII. otherwise we have no
394 # idea where the multi-byte boundary is, so just leave it.
452 # idea where the multi-byte boundary is, so just leave it.
395 if encoding.isasciistr(func):
453 if encoding.isasciistr(func):
396 func = func[:41]
454 func = func[:41]
397 lastfunc[1] = func
455 lastfunc[1] = func
398 break
456 break
399 # by recording this hunk's starting point as the next place to
457 # by recording this hunk's starting point as the next place to
400 # start looking for function lines, we avoid reading any line in
458 # start looking for function lines, we avoid reading any line in
401 # the file more than once.
459 # the file more than once.
402 lastfunc[0] = astart
460 lastfunc[0] = astart
403
461
404 # zero-length hunk ranges report their start line as one less
462 # zero-length hunk ranges report their start line as one less
405 if alen:
463 if alen:
406 astart += 1
464 astart += 1
407 if blen:
465 if blen:
408 bstart += 1
466 bstart += 1
409
467
410 hunkrange = astart, alen, bstart, blen
468 hunkrange = astart, alen, bstart, blen
411 hunklines = (
469 hunklines = (
412 [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
470 [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
413 + delta
471 + delta
414 + [b' ' + l1[x] for x in range(a2, aend)]
472 + [b' ' + l1[x] for x in range(a2, aend)]
415 )
473 )
416 # If either file ends without a newline and the last line of
474 # If either file ends without a newline and the last line of
417 # that file is part of a hunk, a marker is printed. If the
475 # that file is part of a hunk, a marker is printed. If the
418 # last line of both files is identical and neither ends in
476 # last line of both files is identical and neither ends in
419 # a newline, print only one marker. That's the only case in
477 # a newline, print only one marker. That's the only case in
420 # which the hunk can end in a shared line without a newline.
478 # which the hunk can end in a shared line without a newline.
421 skip = False
479 skip = False
422 if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
480 if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
423 for i in range(len(hunklines) - 1, -1, -1):
481 for i in range(len(hunklines) - 1, -1, -1):
424 if hunklines[i].startswith((b'-', b' ')):
482 if hunklines[i].startswith((b'-', b' ')):
425 if hunklines[i].startswith(b' '):
483 if hunklines[i].startswith(b' '):
426 skip = True
484 skip = True
427 hunklines[i] += b'\n'
485 hunklines[i] += b'\n'
428 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
486 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
429 break
487 break
430 if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
488 if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
431 for i in range(len(hunklines) - 1, -1, -1):
489 for i in range(len(hunklines) - 1, -1, -1):
432 if hunklines[i].startswith(b'+'):
490 if hunklines[i].startswith(b'+'):
433 hunklines[i] += b'\n'
491 hunklines[i] += b'\n'
434 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
492 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
435 break
493 break
436 yield hunkrange, hunklines
494 yield hunkrange, hunklines
437
495
438 # bdiff.blocks gives us the matching sequences in the files. The loop
496 # bdiff.blocks gives us the matching sequences in the files. The loop
439 # below finds the spaces between those matching sequences and translates
497 # below finds the spaces between those matching sequences and translates
440 # them into diff output.
498 # them into diff output.
441 #
499 #
442 hunk = None
500 hunk = None
443 ignoredlines = 0
501 ignoredlines = 0
444 has_hunks = False
502 has_hunks = False
445 for s, stype in allblocks(t1, t2, opts, l1, l2):
503 for s, stype in allblocks(t1, t2, opts, l1, l2):
446 a1, a2, b1, b2 = s
504 a1, a2, b1, b2 = s
447 if stype != b'!':
505 if stype != b'!':
448 if stype == b'~':
506 if stype == b'~':
449 # The diff context lines are based on t1 content. When
507 # The diff context lines are based on t1 content. When
450 # blank lines are ignored, the new lines offsets must
508 # blank lines are ignored, the new lines offsets must
451 # be adjusted as if equivalent blocks ('~') had the
509 # be adjusted as if equivalent blocks ('~') had the
452 # same sizes on both sides.
510 # same sizes on both sides.
453 ignoredlines += (b2 - b1) - (a2 - a1)
511 ignoredlines += (b2 - b1) - (a2 - a1)
454 continue
512 continue
455 delta = []
513 delta = []
456 old = l1[a1:a2]
514 old = l1[a1:a2]
457 new = l2[b1:b2]
515 new = l2[b1:b2]
458
516
459 b1 -= ignoredlines
517 b1 -= ignoredlines
460 b2 -= ignoredlines
518 b2 -= ignoredlines
461 astart = contextstart(a1)
519 astart = contextstart(a1)
462 bstart = contextstart(b1)
520 bstart = contextstart(b1)
463 prev = None
521 prev = None
464 if hunk:
522 if hunk:
465 # join with the previous hunk if it falls inside the context
523 # join with the previous hunk if it falls inside the context
466 if astart < hunk[1] + opts.context + 1:
524 if astart < hunk[1] + opts.context + 1:
467 prev = hunk
525 prev = hunk
468 astart = hunk[1]
526 astart = hunk[1]
469 bstart = hunk[3]
527 bstart = hunk[3]
470 else:
528 else:
471 if not has_hunks:
529 if not has_hunks:
472 has_hunks = True
530 has_hunks = True
473 yield True
531 yield True
474 for x in yieldhunk(hunk):
532 for x in yieldhunk(hunk):
475 yield x
533 yield x
476 if prev:
534 if prev:
477 # we've joined the previous hunk, record the new ending points.
535 # we've joined the previous hunk, record the new ending points.
478 hunk = (hunk[0], a2, hunk[2], b2, hunk[4])
536 hunk = (hunk[0], a2, hunk[2], b2, hunk[4])
479 delta = hunk[4]
537 delta = hunk[4]
480 else:
538 else:
481 # create a new hunk
539 # create a new hunk
482 hunk = (astart, a2, bstart, b2, delta)
540 hunk = (astart, a2, bstart, b2, delta)
483
541
484 delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
542 delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
485 delta[len(delta) :] = [b'-' + x for x in old]
543 delta[len(delta) :] = [b'-' + x for x in old]
486 delta[len(delta) :] = [b'+' + x for x in new]
544 delta[len(delta) :] = [b'+' + x for x in new]
487
545
488 if hunk:
546 if hunk:
489 if not has_hunks:
547 if not has_hunks:
490 has_hunks = True
548 has_hunks = True
491 yield True
549 yield True
492 for x in yieldhunk(hunk):
550 for x in yieldhunk(hunk):
493 yield x
551 yield x
494 elif not has_hunks:
552 elif not has_hunks:
495 yield False
553 yield False
496
554
497
555
498 def b85diff(to, tn):
556 def b85diff(to: Optional[bytes], tn: Optional[bytes]) -> bytes:
499 '''print base85-encoded binary diff'''
557 '''print base85-encoded binary diff'''
500
558
501 def fmtline(line):
559 def fmtline(line):
502 l = len(line)
560 l = len(line)
503 if l <= 26:
561 if l <= 26:
504 l = pycompat.bytechr(ord(b'A') + l - 1)
562 l = pycompat.bytechr(ord(b'A') + l - 1)
505 else:
563 else:
506 l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
564 l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
507 return b'%c%s\n' % (l, util.b85encode(line, True))
565 return b'%c%s\n' % (l, util.b85encode(line, True))
508
566
509 def chunk(text, csize=52):
567 def chunk(text, csize=52):
510 l = len(text)
568 l = len(text)
511 i = 0
569 i = 0
512 while i < l:
570 while i < l:
513 yield text[i : i + csize]
571 yield text[i : i + csize]
514 i += csize
572 i += csize
515
573
516 if to is None:
574 if to is None:
517 to = b''
575 to = b''
518 if tn is None:
576 if tn is None:
519 tn = b''
577 tn = b''
520
578
521 if to == tn:
579 if to == tn:
522 return b''
580 return b''
523
581
524 # TODO: deltas
582 # TODO: deltas
525 ret = []
583 ret = []
526 ret.append(b'GIT binary patch\n')
584 ret.append(b'GIT binary patch\n')
527 ret.append(b'literal %d\n' % len(tn))
585 ret.append(b'literal %d\n' % len(tn))
528 for l in chunk(zlib.compress(tn)):
586 for l in chunk(zlib.compress(tn)):
529 ret.append(fmtline(l))
587 ret.append(fmtline(l))
530 ret.append(b'\n')
588 ret.append(b'\n')
531
589
532 return b''.join(ret)
590 return b''.join(ret)
533
591
534
592
535 def patchtext(bin):
593 def patchtext(bin: bytes) -> bytes:
536 pos = 0
594 pos = 0
537 t = []
595 t = []
538 while pos < len(bin):
596 while pos < len(bin):
539 p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
597 p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
540 pos += 12
598 pos += 12
541 t.append(bin[pos : pos + l])
599 t.append(bin[pos : pos + l])
542 pos += l
600 pos += l
543 return b"".join(t)
601 return b"".join(t)
544
602
545
603
546 def patch(a, bin):
604 def patch(a, bin):
547 if len(a) == 0:
605 if len(a) == 0:
548 # skip over trivial delta header
606 # skip over trivial delta header
549 return util.buffer(bin, 12)
607 return util.buffer(bin, 12)
550 return mpatch.patches(a, [bin])
608 return mpatch.patches(a, [bin])
551
609
552
610
553 # similar to difflib.SequenceMatcher.get_matching_blocks
611 # similar to difflib.SequenceMatcher.get_matching_blocks
554 def get_matching_blocks(a, b):
612 def get_matching_blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int]]:
555 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
613 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
556
614
557
615
558 def trivialdiffheader(length):
616 def trivialdiffheader(length: int) -> bytes:
559 return struct.pack(b">lll", 0, 0, length) if length else b''
617 return struct.pack(b">lll", 0, 0, length) if length else b''
560
618
561
619
562 def replacediffheader(oldlen, newlen):
620 def replacediffheader(oldlen: int, newlen: int) -> bytes:
563 return struct.pack(b">lll", 0, oldlen, newlen)
621 return struct.pack(b">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now