##// END OF EJS Templates
py3: use pycompat.bytechr instead of chr...
Pulkit Goyal -
r35961:bdb6ec90 default
parent child Browse files
Show More
@@ -1,522 +1,522 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import re
10 import re
11 import struct
11 import struct
12 import zlib
12 import zlib
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 policy,
17 policy,
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 _missing_newline_marker = "\\ No newline at end of file\n"
22 _missing_newline_marker = "\\ No newline at end of file\n"
23
23
24 bdiff = policy.importmod(r'bdiff')
24 bdiff = policy.importmod(r'bdiff')
25 mpatch = policy.importmod(r'mpatch')
25 mpatch = policy.importmod(r'mpatch')
26
26
27 blocks = bdiff.blocks
27 blocks = bdiff.blocks
28 fixws = bdiff.fixws
28 fixws = bdiff.fixws
29 patches = mpatch.patches
29 patches = mpatch.patches
30 patchedsize = mpatch.patchedsize
30 patchedsize = mpatch.patchedsize
31 textdiff = bdiff.bdiff
31 textdiff = bdiff.bdiff
32
32
33 def splitnewlines(text):
33 def splitnewlines(text):
34 '''like str.splitlines, but only split on newlines.'''
34 '''like str.splitlines, but only split on newlines.'''
35 lines = [l + '\n' for l in text.split('\n')]
35 lines = [l + '\n' for l in text.split('\n')]
36 if lines:
36 if lines:
37 if lines[-1] == '\n':
37 if lines[-1] == '\n':
38 lines.pop()
38 lines.pop()
39 else:
39 else:
40 lines[-1] = lines[-1][:-1]
40 lines[-1] = lines[-1][:-1]
41 return lines
41 return lines
42
42
43 class diffopts(object):
43 class diffopts(object):
44 '''context is the number of context lines
44 '''context is the number of context lines
45 text treats all files as text
45 text treats all files as text
46 showfunc enables diff -p output
46 showfunc enables diff -p output
47 git enables the git extended patch format
47 git enables the git extended patch format
48 nodates removes dates from diff headers
48 nodates removes dates from diff headers
49 nobinary ignores binary files
49 nobinary ignores binary files
50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
51 ignorews ignores all whitespace changes in the diff
51 ignorews ignores all whitespace changes in the diff
52 ignorewsamount ignores changes in the amount of whitespace
52 ignorewsamount ignores changes in the amount of whitespace
53 ignoreblanklines ignores changes whose lines are all blank
53 ignoreblanklines ignores changes whose lines are all blank
54 upgrade generates git diffs to avoid data loss
54 upgrade generates git diffs to avoid data loss
55 '''
55 '''
56
56
57 defaults = {
57 defaults = {
58 'context': 3,
58 'context': 3,
59 'text': False,
59 'text': False,
60 'showfunc': False,
60 'showfunc': False,
61 'git': False,
61 'git': False,
62 'nodates': False,
62 'nodates': False,
63 'nobinary': False,
63 'nobinary': False,
64 'noprefix': False,
64 'noprefix': False,
65 'index': 0,
65 'index': 0,
66 'ignorews': False,
66 'ignorews': False,
67 'ignorewsamount': False,
67 'ignorewsamount': False,
68 'ignorewseol': False,
68 'ignorewseol': False,
69 'ignoreblanklines': False,
69 'ignoreblanklines': False,
70 'upgrade': False,
70 'upgrade': False,
71 'showsimilarity': False,
71 'showsimilarity': False,
72 'worddiff': False,
72 'worddiff': False,
73 }
73 }
74
74
75 def __init__(self, **opts):
75 def __init__(self, **opts):
76 opts = pycompat.byteskwargs(opts)
76 opts = pycompat.byteskwargs(opts)
77 for k in self.defaults.keys():
77 for k in self.defaults.keys():
78 v = opts.get(k)
78 v = opts.get(k)
79 if v is None:
79 if v is None:
80 v = self.defaults[k]
80 v = self.defaults[k]
81 setattr(self, k, v)
81 setattr(self, k, v)
82
82
83 try:
83 try:
84 self.context = int(self.context)
84 self.context = int(self.context)
85 except ValueError:
85 except ValueError:
86 raise error.Abort(_('diff context lines count must be '
86 raise error.Abort(_('diff context lines count must be '
87 'an integer, not %r') % self.context)
87 'an integer, not %r') % self.context)
88
88
89 def copy(self, **kwargs):
89 def copy(self, **kwargs):
90 opts = dict((k, getattr(self, k)) for k in self.defaults)
90 opts = dict((k, getattr(self, k)) for k in self.defaults)
91 opts = pycompat.strkwargs(opts)
91 opts = pycompat.strkwargs(opts)
92 opts.update(kwargs)
92 opts.update(kwargs)
93 return diffopts(**opts)
93 return diffopts(**opts)
94
94
95 defaultopts = diffopts()
95 defaultopts = diffopts()
96
96
97 def wsclean(opts, text, blank=True):
97 def wsclean(opts, text, blank=True):
98 if opts.ignorews:
98 if opts.ignorews:
99 text = bdiff.fixws(text, 1)
99 text = bdiff.fixws(text, 1)
100 elif opts.ignorewsamount:
100 elif opts.ignorewsamount:
101 text = bdiff.fixws(text, 0)
101 text = bdiff.fixws(text, 0)
102 if blank and opts.ignoreblanklines:
102 if blank and opts.ignoreblanklines:
103 text = re.sub('\n+', '\n', text).strip('\n')
103 text = re.sub('\n+', '\n', text).strip('\n')
104 if opts.ignorewseol:
104 if opts.ignorewseol:
105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
106 return text
106 return text
107
107
108 def splitblock(base1, lines1, base2, lines2, opts):
108 def splitblock(base1, lines1, base2, lines2, opts):
109 # The input lines matches except for interwoven blank lines. We
109 # The input lines matches except for interwoven blank lines. We
110 # transform it into a sequence of matching blocks and blank blocks.
110 # transform it into a sequence of matching blocks and blank blocks.
111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
113 s1, e1 = 0, len(lines1)
113 s1, e1 = 0, len(lines1)
114 s2, e2 = 0, len(lines2)
114 s2, e2 = 0, len(lines2)
115 while s1 < e1 or s2 < e2:
115 while s1 < e1 or s2 < e2:
116 i1, i2, btype = s1, s2, '='
116 i1, i2, btype = s1, s2, '='
117 if (i1 >= e1 or lines1[i1] == 0
117 if (i1 >= e1 or lines1[i1] == 0
118 or i2 >= e2 or lines2[i2] == 0):
118 or i2 >= e2 or lines2[i2] == 0):
119 # Consume the block of blank lines
119 # Consume the block of blank lines
120 btype = '~'
120 btype = '~'
121 while i1 < e1 and lines1[i1] == 0:
121 while i1 < e1 and lines1[i1] == 0:
122 i1 += 1
122 i1 += 1
123 while i2 < e2 and lines2[i2] == 0:
123 while i2 < e2 and lines2[i2] == 0:
124 i2 += 1
124 i2 += 1
125 else:
125 else:
126 # Consume the matching lines
126 # Consume the matching lines
127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
128 i1 += 1
128 i1 += 1
129 i2 += 1
129 i2 += 1
130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
131 s1 = i1
131 s1 = i1
132 s2 = i2
132 s2 = i2
133
133
134 def hunkinrange(hunk, linerange):
134 def hunkinrange(hunk, linerange):
135 """Return True if `hunk` defined as (start, length) is in `linerange`
135 """Return True if `hunk` defined as (start, length) is in `linerange`
136 defined as (lowerbound, upperbound).
136 defined as (lowerbound, upperbound).
137
137
138 >>> hunkinrange((5, 10), (2, 7))
138 >>> hunkinrange((5, 10), (2, 7))
139 True
139 True
140 >>> hunkinrange((5, 10), (6, 12))
140 >>> hunkinrange((5, 10), (6, 12))
141 True
141 True
142 >>> hunkinrange((5, 10), (13, 17))
142 >>> hunkinrange((5, 10), (13, 17))
143 True
143 True
144 >>> hunkinrange((5, 10), (3, 17))
144 >>> hunkinrange((5, 10), (3, 17))
145 True
145 True
146 >>> hunkinrange((5, 10), (1, 3))
146 >>> hunkinrange((5, 10), (1, 3))
147 False
147 False
148 >>> hunkinrange((5, 10), (18, 20))
148 >>> hunkinrange((5, 10), (18, 20))
149 False
149 False
150 >>> hunkinrange((5, 10), (1, 5))
150 >>> hunkinrange((5, 10), (1, 5))
151 False
151 False
152 >>> hunkinrange((5, 10), (15, 27))
152 >>> hunkinrange((5, 10), (15, 27))
153 False
153 False
154 """
154 """
155 start, length = hunk
155 start, length = hunk
156 lowerbound, upperbound = linerange
156 lowerbound, upperbound = linerange
157 return lowerbound < start + length and start < upperbound
157 return lowerbound < start + length and start < upperbound
158
158
159 def blocksinrange(blocks, rangeb):
159 def blocksinrange(blocks, rangeb):
160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
161 `rangeb` from ``(b1, b2)`` point of view.
161 `rangeb` from ``(b1, b2)`` point of view.
162
162
163 Return `filteredblocks, rangea` where:
163 Return `filteredblocks, rangea` where:
164
164
165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
167 block ``(b1, b2)`` being inside `rangeb` if
167 block ``(b1, b2)`` being inside `rangeb` if
168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
170 """
170 """
171 lbb, ubb = rangeb
171 lbb, ubb = rangeb
172 lba, uba = None, None
172 lba, uba = None, None
173 filteredblocks = []
173 filteredblocks = []
174 for block in blocks:
174 for block in blocks:
175 (a1, a2, b1, b2), stype = block
175 (a1, a2, b1, b2), stype = block
176 if lbb >= b1 and ubb <= b2 and stype == '=':
176 if lbb >= b1 and ubb <= b2 and stype == '=':
177 # rangeb is within a single "=" hunk, restrict back linerange1
177 # rangeb is within a single "=" hunk, restrict back linerange1
178 # by offsetting rangeb
178 # by offsetting rangeb
179 lba = lbb - b1 + a1
179 lba = lbb - b1 + a1
180 uba = ubb - b1 + a1
180 uba = ubb - b1 + a1
181 else:
181 else:
182 if b1 <= lbb < b2:
182 if b1 <= lbb < b2:
183 if stype == '=':
183 if stype == '=':
184 lba = a2 - (b2 - lbb)
184 lba = a2 - (b2 - lbb)
185 else:
185 else:
186 lba = a1
186 lba = a1
187 if b1 < ubb <= b2:
187 if b1 < ubb <= b2:
188 if stype == '=':
188 if stype == '=':
189 uba = a1 + (ubb - b1)
189 uba = a1 + (ubb - b1)
190 else:
190 else:
191 uba = a2
191 uba = a2
192 if hunkinrange((b1, (b2 - b1)), rangeb):
192 if hunkinrange((b1, (b2 - b1)), rangeb):
193 filteredblocks.append(block)
193 filteredblocks.append(block)
194 if lba is None or uba is None or uba < lba:
194 if lba is None or uba is None or uba < lba:
195 raise error.Abort(_('line range exceeds file size'))
195 raise error.Abort(_('line range exceeds file size'))
196 return filteredblocks, (lba, uba)
196 return filteredblocks, (lba, uba)
197
197
198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
199 """Return (block, type) tuples, where block is an mdiff.blocks
199 """Return (block, type) tuples, where block is an mdiff.blocks
200 line entry. type is '=' for blocks matching exactly one another
200 line entry. type is '=' for blocks matching exactly one another
201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
202 matching only after having filtered blank lines.
202 matching only after having filtered blank lines.
203 line1 and line2 are text1 and text2 split with splitnewlines() if
203 line1 and line2 are text1 and text2 split with splitnewlines() if
204 they are already available.
204 they are already available.
205 """
205 """
206 if opts is None:
206 if opts is None:
207 opts = defaultopts
207 opts = defaultopts
208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
209 text1 = wsclean(opts, text1, False)
209 text1 = wsclean(opts, text1, False)
210 text2 = wsclean(opts, text2, False)
210 text2 = wsclean(opts, text2, False)
211 diff = bdiff.blocks(text1, text2)
211 diff = bdiff.blocks(text1, text2)
212 for i, s1 in enumerate(diff):
212 for i, s1 in enumerate(diff):
213 # The first match is special.
213 # The first match is special.
214 # we've either found a match starting at line 0 or a match later
214 # we've either found a match starting at line 0 or a match later
215 # in the file. If it starts later, old and new below will both be
215 # in the file. If it starts later, old and new below will both be
216 # empty and we'll continue to the next match.
216 # empty and we'll continue to the next match.
217 if i > 0:
217 if i > 0:
218 s = diff[i - 1]
218 s = diff[i - 1]
219 else:
219 else:
220 s = [0, 0, 0, 0]
220 s = [0, 0, 0, 0]
221 s = [s[1], s1[0], s[3], s1[2]]
221 s = [s[1], s1[0], s[3], s1[2]]
222
222
223 # bdiff sometimes gives huge matches past eof, this check eats them,
223 # bdiff sometimes gives huge matches past eof, this check eats them,
224 # and deals with the special first match case described above
224 # and deals with the special first match case described above
225 if s[0] != s[1] or s[2] != s[3]:
225 if s[0] != s[1] or s[2] != s[3]:
226 type = '!'
226 type = '!'
227 if opts.ignoreblanklines:
227 if opts.ignoreblanklines:
228 if lines1 is None:
228 if lines1 is None:
229 lines1 = splitnewlines(text1)
229 lines1 = splitnewlines(text1)
230 if lines2 is None:
230 if lines2 is None:
231 lines2 = splitnewlines(text2)
231 lines2 = splitnewlines(text2)
232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
234 if old == new:
234 if old == new:
235 type = '~'
235 type = '~'
236 yield s, type
236 yield s, type
237 yield s1, '='
237 yield s1, '='
238
238
239 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts, check_binary=True):
239 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts, check_binary=True):
240 """Return a unified diff as a (headers, hunks) tuple.
240 """Return a unified diff as a (headers, hunks) tuple.
241
241
242 If the diff is not null, `headers` is a list with unified diff header
242 If the diff is not null, `headers` is a list with unified diff header
243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
244 (hunkrange, hunklines) coming from _unidiff().
244 (hunkrange, hunklines) coming from _unidiff().
245 Otherwise, `headers` and `hunks` are empty.
245 Otherwise, `headers` and `hunks` are empty.
246
246
247 Setting `check_binary` to false will skip the binary check, i.e. when
247 Setting `check_binary` to false will skip the binary check, i.e. when
248 it has been done in advance. Files are expected to be text in this case.
248 it has been done in advance. Files are expected to be text in this case.
249 """
249 """
250 def datetag(date, fn=None):
250 def datetag(date, fn=None):
251 if not opts.git and not opts.nodates:
251 if not opts.git and not opts.nodates:
252 return '\t%s' % date
252 return '\t%s' % date
253 if fn and ' ' in fn:
253 if fn and ' ' in fn:
254 return '\t'
254 return '\t'
255 return ''
255 return ''
256
256
257 sentinel = [], ()
257 sentinel = [], ()
258 if not a and not b:
258 if not a and not b:
259 return sentinel
259 return sentinel
260
260
261 if opts.noprefix:
261 if opts.noprefix:
262 aprefix = bprefix = ''
262 aprefix = bprefix = ''
263 else:
263 else:
264 aprefix = 'a/'
264 aprefix = 'a/'
265 bprefix = 'b/'
265 bprefix = 'b/'
266
266
267 epoch = util.datestr((0, 0))
267 epoch = util.datestr((0, 0))
268
268
269 fn1 = util.pconvert(fn1)
269 fn1 = util.pconvert(fn1)
270 fn2 = util.pconvert(fn2)
270 fn2 = util.pconvert(fn2)
271
271
272 if not opts.text and check_binary and (util.binary(a) or util.binary(b)):
272 if not opts.text and check_binary and (util.binary(a) or util.binary(b)):
273 if a and b and len(a) == len(b) and a == b:
273 if a and b and len(a) == len(b) and a == b:
274 return sentinel
274 return sentinel
275 headerlines = []
275 headerlines = []
276 hunks = (None, ['Binary file %s has changed\n' % fn1]),
276 hunks = (None, ['Binary file %s has changed\n' % fn1]),
277 elif not a:
277 elif not a:
278 without_newline = b[-1:] != '\n'
278 without_newline = b[-1:] != '\n'
279 b = splitnewlines(b)
279 b = splitnewlines(b)
280 if a is None:
280 if a is None:
281 l1 = '--- /dev/null%s' % datetag(epoch)
281 l1 = '--- /dev/null%s' % datetag(epoch)
282 else:
282 else:
283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
285 headerlines = [l1, l2]
285 headerlines = [l1, l2]
286 size = len(b)
286 size = len(b)
287 hunkrange = (0, 0, 1, size)
287 hunkrange = (0, 0, 1, size)
288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
289 if without_newline:
289 if without_newline:
290 hunklines[-1] += '\n'
290 hunklines[-1] += '\n'
291 hunklines.append(_missing_newline_marker)
291 hunklines.append(_missing_newline_marker)
292 hunks = (hunkrange, hunklines),
292 hunks = (hunkrange, hunklines),
293 elif not b:
293 elif not b:
294 without_newline = a[-1:] != '\n'
294 without_newline = a[-1:] != '\n'
295 a = splitnewlines(a)
295 a = splitnewlines(a)
296 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
296 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
297 if b is None:
297 if b is None:
298 l2 = '+++ /dev/null%s' % datetag(epoch)
298 l2 = '+++ /dev/null%s' % datetag(epoch)
299 else:
299 else:
300 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
300 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
301 headerlines = [l1, l2]
301 headerlines = [l1, l2]
302 size = len(a)
302 size = len(a)
303 hunkrange = (1, size, 0, 0)
303 hunkrange = (1, size, 0, 0)
304 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
304 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
305 if without_newline:
305 if without_newline:
306 hunklines[-1] += '\n'
306 hunklines[-1] += '\n'
307 hunklines.append(_missing_newline_marker)
307 hunklines.append(_missing_newline_marker)
308 hunks = (hunkrange, hunklines),
308 hunks = (hunkrange, hunklines),
309 else:
309 else:
310 hunks = _unidiff(a, b, opts=opts)
310 hunks = _unidiff(a, b, opts=opts)
311 if not next(hunks):
311 if not next(hunks):
312 return sentinel
312 return sentinel
313
313
314 headerlines = [
314 headerlines = [
315 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
315 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
316 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
316 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
317 ]
317 ]
318
318
319 return headerlines, hunks
319 return headerlines, hunks
320
320
321 def _unidiff(t1, t2, opts=defaultopts):
321 def _unidiff(t1, t2, opts=defaultopts):
322 """Yield hunks of a headerless unified diff from t1 and t2 texts.
322 """Yield hunks of a headerless unified diff from t1 and t2 texts.
323
323
324 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
324 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
325 tuple (s1, l1, s2, l2) representing the range information of the hunk to
325 tuple (s1, l1, s2, l2) representing the range information of the hunk to
326 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
326 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
327 of the hunk combining said header followed by line additions and
327 of the hunk combining said header followed by line additions and
328 deletions.
328 deletions.
329
329
330 The hunks are prefixed with a bool.
330 The hunks are prefixed with a bool.
331 """
331 """
332 l1 = splitnewlines(t1)
332 l1 = splitnewlines(t1)
333 l2 = splitnewlines(t2)
333 l2 = splitnewlines(t2)
334 def contextend(l, len):
334 def contextend(l, len):
335 ret = l + opts.context
335 ret = l + opts.context
336 if ret > len:
336 if ret > len:
337 ret = len
337 ret = len
338 return ret
338 return ret
339
339
340 def contextstart(l):
340 def contextstart(l):
341 ret = l - opts.context
341 ret = l - opts.context
342 if ret < 0:
342 if ret < 0:
343 return 0
343 return 0
344 return ret
344 return ret
345
345
346 lastfunc = [0, '']
346 lastfunc = [0, '']
347 def yieldhunk(hunk):
347 def yieldhunk(hunk):
348 (astart, a2, bstart, b2, delta) = hunk
348 (astart, a2, bstart, b2, delta) = hunk
349 aend = contextend(a2, len(l1))
349 aend = contextend(a2, len(l1))
350 alen = aend - astart
350 alen = aend - astart
351 blen = b2 - bstart + aend - a2
351 blen = b2 - bstart + aend - a2
352
352
353 func = ""
353 func = ""
354 if opts.showfunc:
354 if opts.showfunc:
355 lastpos, func = lastfunc
355 lastpos, func = lastfunc
356 # walk backwards from the start of the context up to the start of
356 # walk backwards from the start of the context up to the start of
357 # the previous hunk context until we find a line starting with an
357 # the previous hunk context until we find a line starting with an
358 # alphanumeric char.
358 # alphanumeric char.
359 for i in xrange(astart - 1, lastpos - 1, -1):
359 for i in xrange(astart - 1, lastpos - 1, -1):
360 if l1[i][0:1].isalnum():
360 if l1[i][0:1].isalnum():
361 func = ' ' + l1[i].rstrip()[:40]
361 func = ' ' + l1[i].rstrip()[:40]
362 lastfunc[1] = func
362 lastfunc[1] = func
363 break
363 break
364 # by recording this hunk's starting point as the next place to
364 # by recording this hunk's starting point as the next place to
365 # start looking for function lines, we avoid reading any line in
365 # start looking for function lines, we avoid reading any line in
366 # the file more than once.
366 # the file more than once.
367 lastfunc[0] = astart
367 lastfunc[0] = astart
368
368
369 # zero-length hunk ranges report their start line as one less
369 # zero-length hunk ranges report their start line as one less
370 if alen:
370 if alen:
371 astart += 1
371 astart += 1
372 if blen:
372 if blen:
373 bstart += 1
373 bstart += 1
374
374
375 hunkrange = astart, alen, bstart, blen
375 hunkrange = astart, alen, bstart, blen
376 hunklines = (
376 hunklines = (
377 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
377 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
378 + delta
378 + delta
379 + [' ' + l1[x] for x in xrange(a2, aend)]
379 + [' ' + l1[x] for x in xrange(a2, aend)]
380 )
380 )
381 # If either file ends without a newline and the last line of
381 # If either file ends without a newline and the last line of
382 # that file is part of a hunk, a marker is printed. If the
382 # that file is part of a hunk, a marker is printed. If the
383 # last line of both files is identical and neither ends in
383 # last line of both files is identical and neither ends in
384 # a newline, print only one marker. That's the only case in
384 # a newline, print only one marker. That's the only case in
385 # which the hunk can end in a shared line without a newline.
385 # which the hunk can end in a shared line without a newline.
386 skip = False
386 skip = False
387 if t1[-1:] != '\n' and astart + alen == len(l1) + 1:
387 if t1[-1:] != '\n' and astart + alen == len(l1) + 1:
388 for i in xrange(len(hunklines) - 1, -1, -1):
388 for i in xrange(len(hunklines) - 1, -1, -1):
389 if hunklines[i][0:1] in ('-', ' '):
389 if hunklines[i][0:1] in ('-', ' '):
390 if hunklines[i][0:1] == ' ':
390 if hunklines[i][0:1] == ' ':
391 skip = True
391 skip = True
392 hunklines[i] += '\n'
392 hunklines[i] += '\n'
393 hunklines.insert(i + 1, _missing_newline_marker)
393 hunklines.insert(i + 1, _missing_newline_marker)
394 break
394 break
395 if not skip and t2[-1:] != '\n' and bstart + blen == len(l2) + 1:
395 if not skip and t2[-1:] != '\n' and bstart + blen == len(l2) + 1:
396 for i in xrange(len(hunklines) - 1, -1, -1):
396 for i in xrange(len(hunklines) - 1, -1, -1):
397 if hunklines[i][0:1] == '+':
397 if hunklines[i][0:1] == '+':
398 hunklines[i] += '\n'
398 hunklines[i] += '\n'
399 hunklines.insert(i + 1, _missing_newline_marker)
399 hunklines.insert(i + 1, _missing_newline_marker)
400 break
400 break
401 yield hunkrange, hunklines
401 yield hunkrange, hunklines
402
402
403 # bdiff.blocks gives us the matching sequences in the files. The loop
403 # bdiff.blocks gives us the matching sequences in the files. The loop
404 # below finds the spaces between those matching sequences and translates
404 # below finds the spaces between those matching sequences and translates
405 # them into diff output.
405 # them into diff output.
406 #
406 #
407 hunk = None
407 hunk = None
408 ignoredlines = 0
408 ignoredlines = 0
409 has_hunks = False
409 has_hunks = False
410 for s, stype in allblocks(t1, t2, opts, l1, l2):
410 for s, stype in allblocks(t1, t2, opts, l1, l2):
411 a1, a2, b1, b2 = s
411 a1, a2, b1, b2 = s
412 if stype != '!':
412 if stype != '!':
413 if stype == '~':
413 if stype == '~':
414 # The diff context lines are based on t1 content. When
414 # The diff context lines are based on t1 content. When
415 # blank lines are ignored, the new lines offsets must
415 # blank lines are ignored, the new lines offsets must
416 # be adjusted as if equivalent blocks ('~') had the
416 # be adjusted as if equivalent blocks ('~') had the
417 # same sizes on both sides.
417 # same sizes on both sides.
418 ignoredlines += (b2 - b1) - (a2 - a1)
418 ignoredlines += (b2 - b1) - (a2 - a1)
419 continue
419 continue
420 delta = []
420 delta = []
421 old = l1[a1:a2]
421 old = l1[a1:a2]
422 new = l2[b1:b2]
422 new = l2[b1:b2]
423
423
424 b1 -= ignoredlines
424 b1 -= ignoredlines
425 b2 -= ignoredlines
425 b2 -= ignoredlines
426 astart = contextstart(a1)
426 astart = contextstart(a1)
427 bstart = contextstart(b1)
427 bstart = contextstart(b1)
428 prev = None
428 prev = None
429 if hunk:
429 if hunk:
430 # join with the previous hunk if it falls inside the context
430 # join with the previous hunk if it falls inside the context
431 if astart < hunk[1] + opts.context + 1:
431 if astart < hunk[1] + opts.context + 1:
432 prev = hunk
432 prev = hunk
433 astart = hunk[1]
433 astart = hunk[1]
434 bstart = hunk[3]
434 bstart = hunk[3]
435 else:
435 else:
436 if not has_hunks:
436 if not has_hunks:
437 has_hunks = True
437 has_hunks = True
438 yield True
438 yield True
439 for x in yieldhunk(hunk):
439 for x in yieldhunk(hunk):
440 yield x
440 yield x
441 if prev:
441 if prev:
442 # we've joined the previous hunk, record the new ending points.
442 # we've joined the previous hunk, record the new ending points.
443 hunk[1] = a2
443 hunk[1] = a2
444 hunk[3] = b2
444 hunk[3] = b2
445 delta = hunk[4]
445 delta = hunk[4]
446 else:
446 else:
447 # create a new hunk
447 # create a new hunk
448 hunk = [astart, a2, bstart, b2, delta]
448 hunk = [astart, a2, bstart, b2, delta]
449
449
450 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
450 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
451 delta[len(delta):] = ['-' + x for x in old]
451 delta[len(delta):] = ['-' + x for x in old]
452 delta[len(delta):] = ['+' + x for x in new]
452 delta[len(delta):] = ['+' + x for x in new]
453
453
454 if hunk:
454 if hunk:
455 if not has_hunks:
455 if not has_hunks:
456 has_hunks = True
456 has_hunks = True
457 yield True
457 yield True
458 for x in yieldhunk(hunk):
458 for x in yieldhunk(hunk):
459 yield x
459 yield x
460 elif not has_hunks:
460 elif not has_hunks:
461 yield False
461 yield False
462
462
463 def b85diff(to, tn):
463 def b85diff(to, tn):
464 '''print base85-encoded binary diff'''
464 '''print base85-encoded binary diff'''
465 def fmtline(line):
465 def fmtline(line):
466 l = len(line)
466 l = len(line)
467 if l <= 26:
467 if l <= 26:
468 l = chr(ord('A') + l - 1)
468 l = pycompat.bytechr(ord('A') + l - 1)
469 else:
469 else:
470 l = chr(l - 26 + ord('a') - 1)
470 l = pycompat.bytechr(l - 26 + ord('a') - 1)
471 return '%c%s\n' % (l, util.b85encode(line, True))
471 return '%c%s\n' % (l, util.b85encode(line, True))
472
472
473 def chunk(text, csize=52):
473 def chunk(text, csize=52):
474 l = len(text)
474 l = len(text)
475 i = 0
475 i = 0
476 while i < l:
476 while i < l:
477 yield text[i:i + csize]
477 yield text[i:i + csize]
478 i += csize
478 i += csize
479
479
480 if to is None:
480 if to is None:
481 to = ''
481 to = ''
482 if tn is None:
482 if tn is None:
483 tn = ''
483 tn = ''
484
484
485 if to == tn:
485 if to == tn:
486 return ''
486 return ''
487
487
488 # TODO: deltas
488 # TODO: deltas
489 ret = []
489 ret = []
490 ret.append('GIT binary patch\n')
490 ret.append('GIT binary patch\n')
491 ret.append('literal %d\n' % len(tn))
491 ret.append('literal %d\n' % len(tn))
492 for l in chunk(zlib.compress(tn)):
492 for l in chunk(zlib.compress(tn)):
493 ret.append(fmtline(l))
493 ret.append(fmtline(l))
494 ret.append('\n')
494 ret.append('\n')
495
495
496 return ''.join(ret)
496 return ''.join(ret)
497
497
498 def patchtext(bin):
498 def patchtext(bin):
499 pos = 0
499 pos = 0
500 t = []
500 t = []
501 while pos < len(bin):
501 while pos < len(bin):
502 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
502 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
503 pos += 12
503 pos += 12
504 t.append(bin[pos:pos + l])
504 t.append(bin[pos:pos + l])
505 pos += l
505 pos += l
506 return "".join(t)
506 return "".join(t)
507
507
508 def patch(a, bin):
508 def patch(a, bin):
509 if len(a) == 0:
509 if len(a) == 0:
510 # skip over trivial delta header
510 # skip over trivial delta header
511 return util.buffer(bin, 12)
511 return util.buffer(bin, 12)
512 return mpatch.patches(a, [bin])
512 return mpatch.patches(a, [bin])
513
513
514 # similar to difflib.SequenceMatcher.get_matching_blocks
514 # similar to difflib.SequenceMatcher.get_matching_blocks
515 def get_matching_blocks(a, b):
515 def get_matching_blocks(a, b):
516 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
516 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
517
517
518 def trivialdiffheader(length):
518 def trivialdiffheader(length):
519 return struct.pack(">lll", 0, 0, length) if length else ''
519 return struct.pack(">lll", 0, 0, length) if length else ''
520
520
521 def replacediffheader(oldlen, newlen):
521 def replacediffheader(oldlen, newlen):
522 return struct.pack(">lll", 0, oldlen, newlen)
522 return struct.pack(">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now