##// END OF EJS Templates
mdiff: use str.startswith/endswith() instead of slicing
Yuya Nishihara -
r35970:9e641c45 default
parent child Browse files
Show More
@@ -1,521 +1,521 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import re
10 import re
11 import struct
11 import struct
12 import zlib
12 import zlib
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 policy,
17 policy,
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 _missing_newline_marker = "\\ No newline at end of file\n"
22 _missing_newline_marker = "\\ No newline at end of file\n"
23
23
24 bdiff = policy.importmod(r'bdiff')
24 bdiff = policy.importmod(r'bdiff')
25 mpatch = policy.importmod(r'mpatch')
25 mpatch = policy.importmod(r'mpatch')
26
26
27 blocks = bdiff.blocks
27 blocks = bdiff.blocks
28 fixws = bdiff.fixws
28 fixws = bdiff.fixws
29 patches = mpatch.patches
29 patches = mpatch.patches
30 patchedsize = mpatch.patchedsize
30 patchedsize = mpatch.patchedsize
31 textdiff = bdiff.bdiff
31 textdiff = bdiff.bdiff
32
32
33 def splitnewlines(text):
33 def splitnewlines(text):
34 '''like str.splitlines, but only split on newlines.'''
34 '''like str.splitlines, but only split on newlines.'''
35 lines = [l + '\n' for l in text.split('\n')]
35 lines = [l + '\n' for l in text.split('\n')]
36 if lines:
36 if lines:
37 if lines[-1] == '\n':
37 if lines[-1] == '\n':
38 lines.pop()
38 lines.pop()
39 else:
39 else:
40 lines[-1] = lines[-1][:-1]
40 lines[-1] = lines[-1][:-1]
41 return lines
41 return lines
42
42
43 class diffopts(object):
43 class diffopts(object):
44 '''context is the number of context lines
44 '''context is the number of context lines
45 text treats all files as text
45 text treats all files as text
46 showfunc enables diff -p output
46 showfunc enables diff -p output
47 git enables the git extended patch format
47 git enables the git extended patch format
48 nodates removes dates from diff headers
48 nodates removes dates from diff headers
49 nobinary ignores binary files
49 nobinary ignores binary files
50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
51 ignorews ignores all whitespace changes in the diff
51 ignorews ignores all whitespace changes in the diff
52 ignorewsamount ignores changes in the amount of whitespace
52 ignorewsamount ignores changes in the amount of whitespace
53 ignoreblanklines ignores changes whose lines are all blank
53 ignoreblanklines ignores changes whose lines are all blank
54 upgrade generates git diffs to avoid data loss
54 upgrade generates git diffs to avoid data loss
55 '''
55 '''
56
56
57 defaults = {
57 defaults = {
58 'context': 3,
58 'context': 3,
59 'text': False,
59 'text': False,
60 'showfunc': False,
60 'showfunc': False,
61 'git': False,
61 'git': False,
62 'nodates': False,
62 'nodates': False,
63 'nobinary': False,
63 'nobinary': False,
64 'noprefix': False,
64 'noprefix': False,
65 'index': 0,
65 'index': 0,
66 'ignorews': False,
66 'ignorews': False,
67 'ignorewsamount': False,
67 'ignorewsamount': False,
68 'ignorewseol': False,
68 'ignorewseol': False,
69 'ignoreblanklines': False,
69 'ignoreblanklines': False,
70 'upgrade': False,
70 'upgrade': False,
71 'showsimilarity': False,
71 'showsimilarity': False,
72 'worddiff': False,
72 'worddiff': False,
73 }
73 }
74
74
75 def __init__(self, **opts):
75 def __init__(self, **opts):
76 opts = pycompat.byteskwargs(opts)
76 opts = pycompat.byteskwargs(opts)
77 for k in self.defaults.keys():
77 for k in self.defaults.keys():
78 v = opts.get(k)
78 v = opts.get(k)
79 if v is None:
79 if v is None:
80 v = self.defaults[k]
80 v = self.defaults[k]
81 setattr(self, k, v)
81 setattr(self, k, v)
82
82
83 try:
83 try:
84 self.context = int(self.context)
84 self.context = int(self.context)
85 except ValueError:
85 except ValueError:
86 raise error.Abort(_('diff context lines count must be '
86 raise error.Abort(_('diff context lines count must be '
87 'an integer, not %r') % self.context)
87 'an integer, not %r') % self.context)
88
88
89 def copy(self, **kwargs):
89 def copy(self, **kwargs):
90 opts = dict((k, getattr(self, k)) for k in self.defaults)
90 opts = dict((k, getattr(self, k)) for k in self.defaults)
91 opts = pycompat.strkwargs(opts)
91 opts = pycompat.strkwargs(opts)
92 opts.update(kwargs)
92 opts.update(kwargs)
93 return diffopts(**opts)
93 return diffopts(**opts)
94
94
95 defaultopts = diffopts()
95 defaultopts = diffopts()
96
96
97 def wsclean(opts, text, blank=True):
97 def wsclean(opts, text, blank=True):
98 if opts.ignorews:
98 if opts.ignorews:
99 text = bdiff.fixws(text, 1)
99 text = bdiff.fixws(text, 1)
100 elif opts.ignorewsamount:
100 elif opts.ignorewsamount:
101 text = bdiff.fixws(text, 0)
101 text = bdiff.fixws(text, 0)
102 if blank and opts.ignoreblanklines:
102 if blank and opts.ignoreblanklines:
103 text = re.sub('\n+', '\n', text).strip('\n')
103 text = re.sub('\n+', '\n', text).strip('\n')
104 if opts.ignorewseol:
104 if opts.ignorewseol:
105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
106 return text
106 return text
107
107
108 def splitblock(base1, lines1, base2, lines2, opts):
108 def splitblock(base1, lines1, base2, lines2, opts):
109 # The input lines matches except for interwoven blank lines. We
109 # The input lines matches except for interwoven blank lines. We
110 # transform it into a sequence of matching blocks and blank blocks.
110 # transform it into a sequence of matching blocks and blank blocks.
111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
113 s1, e1 = 0, len(lines1)
113 s1, e1 = 0, len(lines1)
114 s2, e2 = 0, len(lines2)
114 s2, e2 = 0, len(lines2)
115 while s1 < e1 or s2 < e2:
115 while s1 < e1 or s2 < e2:
116 i1, i2, btype = s1, s2, '='
116 i1, i2, btype = s1, s2, '='
117 if (i1 >= e1 or lines1[i1] == 0
117 if (i1 >= e1 or lines1[i1] == 0
118 or i2 >= e2 or lines2[i2] == 0):
118 or i2 >= e2 or lines2[i2] == 0):
119 # Consume the block of blank lines
119 # Consume the block of blank lines
120 btype = '~'
120 btype = '~'
121 while i1 < e1 and lines1[i1] == 0:
121 while i1 < e1 and lines1[i1] == 0:
122 i1 += 1
122 i1 += 1
123 while i2 < e2 and lines2[i2] == 0:
123 while i2 < e2 and lines2[i2] == 0:
124 i2 += 1
124 i2 += 1
125 else:
125 else:
126 # Consume the matching lines
126 # Consume the matching lines
127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
128 i1 += 1
128 i1 += 1
129 i2 += 1
129 i2 += 1
130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
131 s1 = i1
131 s1 = i1
132 s2 = i2
132 s2 = i2
133
133
134 def hunkinrange(hunk, linerange):
134 def hunkinrange(hunk, linerange):
135 """Return True if `hunk` defined as (start, length) is in `linerange`
135 """Return True if `hunk` defined as (start, length) is in `linerange`
136 defined as (lowerbound, upperbound).
136 defined as (lowerbound, upperbound).
137
137
138 >>> hunkinrange((5, 10), (2, 7))
138 >>> hunkinrange((5, 10), (2, 7))
139 True
139 True
140 >>> hunkinrange((5, 10), (6, 12))
140 >>> hunkinrange((5, 10), (6, 12))
141 True
141 True
142 >>> hunkinrange((5, 10), (13, 17))
142 >>> hunkinrange((5, 10), (13, 17))
143 True
143 True
144 >>> hunkinrange((5, 10), (3, 17))
144 >>> hunkinrange((5, 10), (3, 17))
145 True
145 True
146 >>> hunkinrange((5, 10), (1, 3))
146 >>> hunkinrange((5, 10), (1, 3))
147 False
147 False
148 >>> hunkinrange((5, 10), (18, 20))
148 >>> hunkinrange((5, 10), (18, 20))
149 False
149 False
150 >>> hunkinrange((5, 10), (1, 5))
150 >>> hunkinrange((5, 10), (1, 5))
151 False
151 False
152 >>> hunkinrange((5, 10), (15, 27))
152 >>> hunkinrange((5, 10), (15, 27))
153 False
153 False
154 """
154 """
155 start, length = hunk
155 start, length = hunk
156 lowerbound, upperbound = linerange
156 lowerbound, upperbound = linerange
157 return lowerbound < start + length and start < upperbound
157 return lowerbound < start + length and start < upperbound
158
158
159 def blocksinrange(blocks, rangeb):
159 def blocksinrange(blocks, rangeb):
160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
161 `rangeb` from ``(b1, b2)`` point of view.
161 `rangeb` from ``(b1, b2)`` point of view.
162
162
163 Return `filteredblocks, rangea` where:
163 Return `filteredblocks, rangea` where:
164
164
165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
167 block ``(b1, b2)`` being inside `rangeb` if
167 block ``(b1, b2)`` being inside `rangeb` if
168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
170 """
170 """
171 lbb, ubb = rangeb
171 lbb, ubb = rangeb
172 lba, uba = None, None
172 lba, uba = None, None
173 filteredblocks = []
173 filteredblocks = []
174 for block in blocks:
174 for block in blocks:
175 (a1, a2, b1, b2), stype = block
175 (a1, a2, b1, b2), stype = block
176 if lbb >= b1 and ubb <= b2 and stype == '=':
176 if lbb >= b1 and ubb <= b2 and stype == '=':
177 # rangeb is within a single "=" hunk, restrict back linerange1
177 # rangeb is within a single "=" hunk, restrict back linerange1
178 # by offsetting rangeb
178 # by offsetting rangeb
179 lba = lbb - b1 + a1
179 lba = lbb - b1 + a1
180 uba = ubb - b1 + a1
180 uba = ubb - b1 + a1
181 else:
181 else:
182 if b1 <= lbb < b2:
182 if b1 <= lbb < b2:
183 if stype == '=':
183 if stype == '=':
184 lba = a2 - (b2 - lbb)
184 lba = a2 - (b2 - lbb)
185 else:
185 else:
186 lba = a1
186 lba = a1
187 if b1 < ubb <= b2:
187 if b1 < ubb <= b2:
188 if stype == '=':
188 if stype == '=':
189 uba = a1 + (ubb - b1)
189 uba = a1 + (ubb - b1)
190 else:
190 else:
191 uba = a2
191 uba = a2
192 if hunkinrange((b1, (b2 - b1)), rangeb):
192 if hunkinrange((b1, (b2 - b1)), rangeb):
193 filteredblocks.append(block)
193 filteredblocks.append(block)
194 if lba is None or uba is None or uba < lba:
194 if lba is None or uba is None or uba < lba:
195 raise error.Abort(_('line range exceeds file size'))
195 raise error.Abort(_('line range exceeds file size'))
196 return filteredblocks, (lba, uba)
196 return filteredblocks, (lba, uba)
197
197
198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
199 """Return (block, type) tuples, where block is an mdiff.blocks
199 """Return (block, type) tuples, where block is an mdiff.blocks
200 line entry. type is '=' for blocks matching exactly one another
200 line entry. type is '=' for blocks matching exactly one another
201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
202 matching only after having filtered blank lines.
202 matching only after having filtered blank lines.
203 line1 and line2 are text1 and text2 split with splitnewlines() if
203 line1 and line2 are text1 and text2 split with splitnewlines() if
204 they are already available.
204 they are already available.
205 """
205 """
206 if opts is None:
206 if opts is None:
207 opts = defaultopts
207 opts = defaultopts
208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
209 text1 = wsclean(opts, text1, False)
209 text1 = wsclean(opts, text1, False)
210 text2 = wsclean(opts, text2, False)
210 text2 = wsclean(opts, text2, False)
211 diff = bdiff.blocks(text1, text2)
211 diff = bdiff.blocks(text1, text2)
212 for i, s1 in enumerate(diff):
212 for i, s1 in enumerate(diff):
213 # The first match is special.
213 # The first match is special.
214 # we've either found a match starting at line 0 or a match later
214 # we've either found a match starting at line 0 or a match later
215 # in the file. If it starts later, old and new below will both be
215 # in the file. If it starts later, old and new below will both be
216 # empty and we'll continue to the next match.
216 # empty and we'll continue to the next match.
217 if i > 0:
217 if i > 0:
218 s = diff[i - 1]
218 s = diff[i - 1]
219 else:
219 else:
220 s = [0, 0, 0, 0]
220 s = [0, 0, 0, 0]
221 s = [s[1], s1[0], s[3], s1[2]]
221 s = [s[1], s1[0], s[3], s1[2]]
222
222
223 # bdiff sometimes gives huge matches past eof, this check eats them,
223 # bdiff sometimes gives huge matches past eof, this check eats them,
224 # and deals with the special first match case described above
224 # and deals with the special first match case described above
225 if s[0] != s[1] or s[2] != s[3]:
225 if s[0] != s[1] or s[2] != s[3]:
226 type = '!'
226 type = '!'
227 if opts.ignoreblanklines:
227 if opts.ignoreblanklines:
228 if lines1 is None:
228 if lines1 is None:
229 lines1 = splitnewlines(text1)
229 lines1 = splitnewlines(text1)
230 if lines2 is None:
230 if lines2 is None:
231 lines2 = splitnewlines(text2)
231 lines2 = splitnewlines(text2)
232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
234 if old == new:
234 if old == new:
235 type = '~'
235 type = '~'
236 yield s, type
236 yield s, type
237 yield s1, '='
237 yield s1, '='
238
238
239 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
239 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
240 """Return a unified diff as a (headers, hunks) tuple.
240 """Return a unified diff as a (headers, hunks) tuple.
241
241
242 If the diff is not null, `headers` is a list with unified diff header
242 If the diff is not null, `headers` is a list with unified diff header
243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
244 (hunkrange, hunklines) coming from _unidiff().
244 (hunkrange, hunklines) coming from _unidiff().
245 Otherwise, `headers` and `hunks` are empty.
245 Otherwise, `headers` and `hunks` are empty.
246
246
247 Set binary=True if either a or b should be taken as a binary file.
247 Set binary=True if either a or b should be taken as a binary file.
248 """
248 """
249 def datetag(date, fn=None):
249 def datetag(date, fn=None):
250 if not opts.git and not opts.nodates:
250 if not opts.git and not opts.nodates:
251 return '\t%s' % date
251 return '\t%s' % date
252 if fn and ' ' in fn:
252 if fn and ' ' in fn:
253 return '\t'
253 return '\t'
254 return ''
254 return ''
255
255
256 sentinel = [], ()
256 sentinel = [], ()
257 if not a and not b:
257 if not a and not b:
258 return sentinel
258 return sentinel
259
259
260 if opts.noprefix:
260 if opts.noprefix:
261 aprefix = bprefix = ''
261 aprefix = bprefix = ''
262 else:
262 else:
263 aprefix = 'a/'
263 aprefix = 'a/'
264 bprefix = 'b/'
264 bprefix = 'b/'
265
265
266 epoch = util.datestr((0, 0))
266 epoch = util.datestr((0, 0))
267
267
268 fn1 = util.pconvert(fn1)
268 fn1 = util.pconvert(fn1)
269 fn2 = util.pconvert(fn2)
269 fn2 = util.pconvert(fn2)
270
270
271 if binary:
271 if binary:
272 if a and b and len(a) == len(b) and a == b:
272 if a and b and len(a) == len(b) and a == b:
273 return sentinel
273 return sentinel
274 headerlines = []
274 headerlines = []
275 hunks = (None, ['Binary file %s has changed\n' % fn1]),
275 hunks = (None, ['Binary file %s has changed\n' % fn1]),
276 elif not a:
276 elif not a:
277 without_newline = b[-1:] != '\n'
277 without_newline = not b.endswith('\n')
278 b = splitnewlines(b)
278 b = splitnewlines(b)
279 if a is None:
279 if a is None:
280 l1 = '--- /dev/null%s' % datetag(epoch)
280 l1 = '--- /dev/null%s' % datetag(epoch)
281 else:
281 else:
282 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
282 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
283 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
283 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
284 headerlines = [l1, l2]
284 headerlines = [l1, l2]
285 size = len(b)
285 size = len(b)
286 hunkrange = (0, 0, 1, size)
286 hunkrange = (0, 0, 1, size)
287 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
287 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
288 if without_newline:
288 if without_newline:
289 hunklines[-1] += '\n'
289 hunklines[-1] += '\n'
290 hunklines.append(_missing_newline_marker)
290 hunklines.append(_missing_newline_marker)
291 hunks = (hunkrange, hunklines),
291 hunks = (hunkrange, hunklines),
292 elif not b:
292 elif not b:
293 without_newline = a[-1:] != '\n'
293 without_newline = not a.endswith('\n')
294 a = splitnewlines(a)
294 a = splitnewlines(a)
295 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
295 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
296 if b is None:
296 if b is None:
297 l2 = '+++ /dev/null%s' % datetag(epoch)
297 l2 = '+++ /dev/null%s' % datetag(epoch)
298 else:
298 else:
299 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
299 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
300 headerlines = [l1, l2]
300 headerlines = [l1, l2]
301 size = len(a)
301 size = len(a)
302 hunkrange = (1, size, 0, 0)
302 hunkrange = (1, size, 0, 0)
303 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
303 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
304 if without_newline:
304 if without_newline:
305 hunklines[-1] += '\n'
305 hunklines[-1] += '\n'
306 hunklines.append(_missing_newline_marker)
306 hunklines.append(_missing_newline_marker)
307 hunks = (hunkrange, hunklines),
307 hunks = (hunkrange, hunklines),
308 else:
308 else:
309 hunks = _unidiff(a, b, opts=opts)
309 hunks = _unidiff(a, b, opts=opts)
310 if not next(hunks):
310 if not next(hunks):
311 return sentinel
311 return sentinel
312
312
313 headerlines = [
313 headerlines = [
314 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
314 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
315 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
315 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
316 ]
316 ]
317
317
318 return headerlines, hunks
318 return headerlines, hunks
319
319
320 def _unidiff(t1, t2, opts=defaultopts):
320 def _unidiff(t1, t2, opts=defaultopts):
321 """Yield hunks of a headerless unified diff from t1 and t2 texts.
321 """Yield hunks of a headerless unified diff from t1 and t2 texts.
322
322
323 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
323 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
324 tuple (s1, l1, s2, l2) representing the range information of the hunk to
324 tuple (s1, l1, s2, l2) representing the range information of the hunk to
325 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
325 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
326 of the hunk combining said header followed by line additions and
326 of the hunk combining said header followed by line additions and
327 deletions.
327 deletions.
328
328
329 The hunks are prefixed with a bool.
329 The hunks are prefixed with a bool.
330 """
330 """
331 l1 = splitnewlines(t1)
331 l1 = splitnewlines(t1)
332 l2 = splitnewlines(t2)
332 l2 = splitnewlines(t2)
333 def contextend(l, len):
333 def contextend(l, len):
334 ret = l + opts.context
334 ret = l + opts.context
335 if ret > len:
335 if ret > len:
336 ret = len
336 ret = len
337 return ret
337 return ret
338
338
339 def contextstart(l):
339 def contextstart(l):
340 ret = l - opts.context
340 ret = l - opts.context
341 if ret < 0:
341 if ret < 0:
342 return 0
342 return 0
343 return ret
343 return ret
344
344
345 lastfunc = [0, '']
345 lastfunc = [0, '']
346 def yieldhunk(hunk):
346 def yieldhunk(hunk):
347 (astart, a2, bstart, b2, delta) = hunk
347 (astart, a2, bstart, b2, delta) = hunk
348 aend = contextend(a2, len(l1))
348 aend = contextend(a2, len(l1))
349 alen = aend - astart
349 alen = aend - astart
350 blen = b2 - bstart + aend - a2
350 blen = b2 - bstart + aend - a2
351
351
352 func = ""
352 func = ""
353 if opts.showfunc:
353 if opts.showfunc:
354 lastpos, func = lastfunc
354 lastpos, func = lastfunc
355 # walk backwards from the start of the context up to the start of
355 # walk backwards from the start of the context up to the start of
356 # the previous hunk context until we find a line starting with an
356 # the previous hunk context until we find a line starting with an
357 # alphanumeric char.
357 # alphanumeric char.
358 for i in xrange(astart - 1, lastpos - 1, -1):
358 for i in xrange(astart - 1, lastpos - 1, -1):
359 if l1[i][0:1].isalnum():
359 if l1[i][0:1].isalnum():
360 func = ' ' + l1[i].rstrip()[:40]
360 func = ' ' + l1[i].rstrip()[:40]
361 lastfunc[1] = func
361 lastfunc[1] = func
362 break
362 break
363 # by recording this hunk's starting point as the next place to
363 # by recording this hunk's starting point as the next place to
364 # start looking for function lines, we avoid reading any line in
364 # start looking for function lines, we avoid reading any line in
365 # the file more than once.
365 # the file more than once.
366 lastfunc[0] = astart
366 lastfunc[0] = astart
367
367
368 # zero-length hunk ranges report their start line as one less
368 # zero-length hunk ranges report their start line as one less
369 if alen:
369 if alen:
370 astart += 1
370 astart += 1
371 if blen:
371 if blen:
372 bstart += 1
372 bstart += 1
373
373
374 hunkrange = astart, alen, bstart, blen
374 hunkrange = astart, alen, bstart, blen
375 hunklines = (
375 hunklines = (
376 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
376 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
377 + delta
377 + delta
378 + [' ' + l1[x] for x in xrange(a2, aend)]
378 + [' ' + l1[x] for x in xrange(a2, aend)]
379 )
379 )
380 # If either file ends without a newline and the last line of
380 # If either file ends without a newline and the last line of
381 # that file is part of a hunk, a marker is printed. If the
381 # that file is part of a hunk, a marker is printed. If the
382 # last line of both files is identical and neither ends in
382 # last line of both files is identical and neither ends in
383 # a newline, print only one marker. That's the only case in
383 # a newline, print only one marker. That's the only case in
384 # which the hunk can end in a shared line without a newline.
384 # which the hunk can end in a shared line without a newline.
385 skip = False
385 skip = False
386 if t1[-1:] != '\n' and astart + alen == len(l1) + 1:
386 if not t1.endswith('\n') and astart + alen == len(l1) + 1:
387 for i in xrange(len(hunklines) - 1, -1, -1):
387 for i in xrange(len(hunklines) - 1, -1, -1):
388 if hunklines[i][0:1] in ('-', ' '):
388 if hunklines[i].startswith(('-', ' ')):
389 if hunklines[i][0:1] == ' ':
389 if hunklines[i].startswith(' '):
390 skip = True
390 skip = True
391 hunklines[i] += '\n'
391 hunklines[i] += '\n'
392 hunklines.insert(i + 1, _missing_newline_marker)
392 hunklines.insert(i + 1, _missing_newline_marker)
393 break
393 break
394 if not skip and t2[-1:] != '\n' and bstart + blen == len(l2) + 1:
394 if not skip and not t2.endswith('\n') and bstart + blen == len(l2) + 1:
395 for i in xrange(len(hunklines) - 1, -1, -1):
395 for i in xrange(len(hunklines) - 1, -1, -1):
396 if hunklines[i][0:1] == '+':
396 if hunklines[i].startswith('+'):
397 hunklines[i] += '\n'
397 hunklines[i] += '\n'
398 hunklines.insert(i + 1, _missing_newline_marker)
398 hunklines.insert(i + 1, _missing_newline_marker)
399 break
399 break
400 yield hunkrange, hunklines
400 yield hunkrange, hunklines
401
401
402 # bdiff.blocks gives us the matching sequences in the files. The loop
402 # bdiff.blocks gives us the matching sequences in the files. The loop
403 # below finds the spaces between those matching sequences and translates
403 # below finds the spaces between those matching sequences and translates
404 # them into diff output.
404 # them into diff output.
405 #
405 #
406 hunk = None
406 hunk = None
407 ignoredlines = 0
407 ignoredlines = 0
408 has_hunks = False
408 has_hunks = False
409 for s, stype in allblocks(t1, t2, opts, l1, l2):
409 for s, stype in allblocks(t1, t2, opts, l1, l2):
410 a1, a2, b1, b2 = s
410 a1, a2, b1, b2 = s
411 if stype != '!':
411 if stype != '!':
412 if stype == '~':
412 if stype == '~':
413 # The diff context lines are based on t1 content. When
413 # The diff context lines are based on t1 content. When
414 # blank lines are ignored, the new lines offsets must
414 # blank lines are ignored, the new lines offsets must
415 # be adjusted as if equivalent blocks ('~') had the
415 # be adjusted as if equivalent blocks ('~') had the
416 # same sizes on both sides.
416 # same sizes on both sides.
417 ignoredlines += (b2 - b1) - (a2 - a1)
417 ignoredlines += (b2 - b1) - (a2 - a1)
418 continue
418 continue
419 delta = []
419 delta = []
420 old = l1[a1:a2]
420 old = l1[a1:a2]
421 new = l2[b1:b2]
421 new = l2[b1:b2]
422
422
423 b1 -= ignoredlines
423 b1 -= ignoredlines
424 b2 -= ignoredlines
424 b2 -= ignoredlines
425 astart = contextstart(a1)
425 astart = contextstart(a1)
426 bstart = contextstart(b1)
426 bstart = contextstart(b1)
427 prev = None
427 prev = None
428 if hunk:
428 if hunk:
429 # join with the previous hunk if it falls inside the context
429 # join with the previous hunk if it falls inside the context
430 if astart < hunk[1] + opts.context + 1:
430 if astart < hunk[1] + opts.context + 1:
431 prev = hunk
431 prev = hunk
432 astart = hunk[1]
432 astart = hunk[1]
433 bstart = hunk[3]
433 bstart = hunk[3]
434 else:
434 else:
435 if not has_hunks:
435 if not has_hunks:
436 has_hunks = True
436 has_hunks = True
437 yield True
437 yield True
438 for x in yieldhunk(hunk):
438 for x in yieldhunk(hunk):
439 yield x
439 yield x
440 if prev:
440 if prev:
441 # we've joined the previous hunk, record the new ending points.
441 # we've joined the previous hunk, record the new ending points.
442 hunk[1] = a2
442 hunk[1] = a2
443 hunk[3] = b2
443 hunk[3] = b2
444 delta = hunk[4]
444 delta = hunk[4]
445 else:
445 else:
446 # create a new hunk
446 # create a new hunk
447 hunk = [astart, a2, bstart, b2, delta]
447 hunk = [astart, a2, bstart, b2, delta]
448
448
449 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
449 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
450 delta[len(delta):] = ['-' + x for x in old]
450 delta[len(delta):] = ['-' + x for x in old]
451 delta[len(delta):] = ['+' + x for x in new]
451 delta[len(delta):] = ['+' + x for x in new]
452
452
453 if hunk:
453 if hunk:
454 if not has_hunks:
454 if not has_hunks:
455 has_hunks = True
455 has_hunks = True
456 yield True
456 yield True
457 for x in yieldhunk(hunk):
457 for x in yieldhunk(hunk):
458 yield x
458 yield x
459 elif not has_hunks:
459 elif not has_hunks:
460 yield False
460 yield False
461
461
462 def b85diff(to, tn):
462 def b85diff(to, tn):
463 '''print base85-encoded binary diff'''
463 '''print base85-encoded binary diff'''
464 def fmtline(line):
464 def fmtline(line):
465 l = len(line)
465 l = len(line)
466 if l <= 26:
466 if l <= 26:
467 l = pycompat.bytechr(ord('A') + l - 1)
467 l = pycompat.bytechr(ord('A') + l - 1)
468 else:
468 else:
469 l = pycompat.bytechr(l - 26 + ord('a') - 1)
469 l = pycompat.bytechr(l - 26 + ord('a') - 1)
470 return '%c%s\n' % (l, util.b85encode(line, True))
470 return '%c%s\n' % (l, util.b85encode(line, True))
471
471
472 def chunk(text, csize=52):
472 def chunk(text, csize=52):
473 l = len(text)
473 l = len(text)
474 i = 0
474 i = 0
475 while i < l:
475 while i < l:
476 yield text[i:i + csize]
476 yield text[i:i + csize]
477 i += csize
477 i += csize
478
478
479 if to is None:
479 if to is None:
480 to = ''
480 to = ''
481 if tn is None:
481 if tn is None:
482 tn = ''
482 tn = ''
483
483
484 if to == tn:
484 if to == tn:
485 return ''
485 return ''
486
486
487 # TODO: deltas
487 # TODO: deltas
488 ret = []
488 ret = []
489 ret.append('GIT binary patch\n')
489 ret.append('GIT binary patch\n')
490 ret.append('literal %d\n' % len(tn))
490 ret.append('literal %d\n' % len(tn))
491 for l in chunk(zlib.compress(tn)):
491 for l in chunk(zlib.compress(tn)):
492 ret.append(fmtline(l))
492 ret.append(fmtline(l))
493 ret.append('\n')
493 ret.append('\n')
494
494
495 return ''.join(ret)
495 return ''.join(ret)
496
496
497 def patchtext(bin):
497 def patchtext(bin):
498 pos = 0
498 pos = 0
499 t = []
499 t = []
500 while pos < len(bin):
500 while pos < len(bin):
501 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
501 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
502 pos += 12
502 pos += 12
503 t.append(bin[pos:pos + l])
503 t.append(bin[pos:pos + l])
504 pos += l
504 pos += l
505 return "".join(t)
505 return "".join(t)
506
506
507 def patch(a, bin):
507 def patch(a, bin):
508 if len(a) == 0:
508 if len(a) == 0:
509 # skip over trivial delta header
509 # skip over trivial delta header
510 return util.buffer(bin, 12)
510 return util.buffer(bin, 12)
511 return mpatch.patches(a, [bin])
511 return mpatch.patches(a, [bin])
512
512
513 # similar to difflib.SequenceMatcher.get_matching_blocks
513 # similar to difflib.SequenceMatcher.get_matching_blocks
514 def get_matching_blocks(a, b):
514 def get_matching_blocks(a, b):
515 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
515 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
516
516
517 def trivialdiffheader(length):
517 def trivialdiffheader(length):
518 return struct.pack(">lll", 0, 0, length) if length else ''
518 return struct.pack(">lll", 0, 0, length) if length else ''
519
519
520 def replacediffheader(oldlen, newlen):
520 def replacediffheader(oldlen, newlen):
521 return struct.pack(">lll", 0, oldlen, newlen)
521 return struct.pack(">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now