##// END OF EJS Templates
py3: slice on bytes instead of indexing...
Pulkit Goyal -
r35601:2f123f30 default
parent child Browse files
Show More
@@ -1,492 +1,492 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import re
10 import re
11 import struct
11 import struct
12 import zlib
12 import zlib
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 policy,
17 policy,
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 bdiff = policy.importmod(r'bdiff')
22 bdiff = policy.importmod(r'bdiff')
23 mpatch = policy.importmod(r'mpatch')
23 mpatch = policy.importmod(r'mpatch')
24
24
25 blocks = bdiff.blocks
25 blocks = bdiff.blocks
26 fixws = bdiff.fixws
26 fixws = bdiff.fixws
27 patches = mpatch.patches
27 patches = mpatch.patches
28 patchedsize = mpatch.patchedsize
28 patchedsize = mpatch.patchedsize
29 textdiff = bdiff.bdiff
29 textdiff = bdiff.bdiff
30
30
31 def splitnewlines(text):
31 def splitnewlines(text):
32 '''like str.splitlines, but only split on newlines.'''
32 '''like str.splitlines, but only split on newlines.'''
33 lines = [l + '\n' for l in text.split('\n')]
33 lines = [l + '\n' for l in text.split('\n')]
34 if lines:
34 if lines:
35 if lines[-1] == '\n':
35 if lines[-1] == '\n':
36 lines.pop()
36 lines.pop()
37 else:
37 else:
38 lines[-1] = lines[-1][:-1]
38 lines[-1] = lines[-1][:-1]
39 return lines
39 return lines
40
40
41 class diffopts(object):
41 class diffopts(object):
42 '''context is the number of context lines
42 '''context is the number of context lines
43 text treats all files as text
43 text treats all files as text
44 showfunc enables diff -p output
44 showfunc enables diff -p output
45 git enables the git extended patch format
45 git enables the git extended patch format
46 nodates removes dates from diff headers
46 nodates removes dates from diff headers
47 nobinary ignores binary files
47 nobinary ignores binary files
48 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
48 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
49 ignorews ignores all whitespace changes in the diff
49 ignorews ignores all whitespace changes in the diff
50 ignorewsamount ignores changes in the amount of whitespace
50 ignorewsamount ignores changes in the amount of whitespace
51 ignoreblanklines ignores changes whose lines are all blank
51 ignoreblanklines ignores changes whose lines are all blank
52 upgrade generates git diffs to avoid data loss
52 upgrade generates git diffs to avoid data loss
53 '''
53 '''
54
54
55 defaults = {
55 defaults = {
56 'context': 3,
56 'context': 3,
57 'text': False,
57 'text': False,
58 'showfunc': False,
58 'showfunc': False,
59 'git': False,
59 'git': False,
60 'nodates': False,
60 'nodates': False,
61 'nobinary': False,
61 'nobinary': False,
62 'noprefix': False,
62 'noprefix': False,
63 'index': 0,
63 'index': 0,
64 'ignorews': False,
64 'ignorews': False,
65 'ignorewsamount': False,
65 'ignorewsamount': False,
66 'ignorewseol': False,
66 'ignorewseol': False,
67 'ignoreblanklines': False,
67 'ignoreblanklines': False,
68 'upgrade': False,
68 'upgrade': False,
69 'showsimilarity': False,
69 'showsimilarity': False,
70 'worddiff': False,
70 'worddiff': False,
71 }
71 }
72
72
73 def __init__(self, **opts):
73 def __init__(self, **opts):
74 opts = pycompat.byteskwargs(opts)
74 opts = pycompat.byteskwargs(opts)
75 for k in self.defaults.keys():
75 for k in self.defaults.keys():
76 v = opts.get(k)
76 v = opts.get(k)
77 if v is None:
77 if v is None:
78 v = self.defaults[k]
78 v = self.defaults[k]
79 setattr(self, k, v)
79 setattr(self, k, v)
80
80
81 try:
81 try:
82 self.context = int(self.context)
82 self.context = int(self.context)
83 except ValueError:
83 except ValueError:
84 raise error.Abort(_('diff context lines count must be '
84 raise error.Abort(_('diff context lines count must be '
85 'an integer, not %r') % self.context)
85 'an integer, not %r') % self.context)
86
86
87 def copy(self, **kwargs):
87 def copy(self, **kwargs):
88 opts = dict((k, getattr(self, k)) for k in self.defaults)
88 opts = dict((k, getattr(self, k)) for k in self.defaults)
89 opts = pycompat.strkwargs(opts)
89 opts = pycompat.strkwargs(opts)
90 opts.update(kwargs)
90 opts.update(kwargs)
91 return diffopts(**opts)
91 return diffopts(**opts)
92
92
93 defaultopts = diffopts()
93 defaultopts = diffopts()
94
94
95 def wsclean(opts, text, blank=True):
95 def wsclean(opts, text, blank=True):
96 if opts.ignorews:
96 if opts.ignorews:
97 text = bdiff.fixws(text, 1)
97 text = bdiff.fixws(text, 1)
98 elif opts.ignorewsamount:
98 elif opts.ignorewsamount:
99 text = bdiff.fixws(text, 0)
99 text = bdiff.fixws(text, 0)
100 if blank and opts.ignoreblanklines:
100 if blank and opts.ignoreblanklines:
101 text = re.sub('\n+', '\n', text).strip('\n')
101 text = re.sub('\n+', '\n', text).strip('\n')
102 if opts.ignorewseol:
102 if opts.ignorewseol:
103 text = re.sub(r'[ \t\r\f]+\n', r'\n', text)
103 text = re.sub(r'[ \t\r\f]+\n', r'\n', text)
104 return text
104 return text
105
105
106 def splitblock(base1, lines1, base2, lines2, opts):
106 def splitblock(base1, lines1, base2, lines2, opts):
107 # The input lines matches except for interwoven blank lines. We
107 # The input lines matches except for interwoven blank lines. We
108 # transform it into a sequence of matching blocks and blank blocks.
108 # transform it into a sequence of matching blocks and blank blocks.
109 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
109 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
110 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
110 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
111 s1, e1 = 0, len(lines1)
111 s1, e1 = 0, len(lines1)
112 s2, e2 = 0, len(lines2)
112 s2, e2 = 0, len(lines2)
113 while s1 < e1 or s2 < e2:
113 while s1 < e1 or s2 < e2:
114 i1, i2, btype = s1, s2, '='
114 i1, i2, btype = s1, s2, '='
115 if (i1 >= e1 or lines1[i1] == 0
115 if (i1 >= e1 or lines1[i1] == 0
116 or i2 >= e2 or lines2[i2] == 0):
116 or i2 >= e2 or lines2[i2] == 0):
117 # Consume the block of blank lines
117 # Consume the block of blank lines
118 btype = '~'
118 btype = '~'
119 while i1 < e1 and lines1[i1] == 0:
119 while i1 < e1 and lines1[i1] == 0:
120 i1 += 1
120 i1 += 1
121 while i2 < e2 and lines2[i2] == 0:
121 while i2 < e2 and lines2[i2] == 0:
122 i2 += 1
122 i2 += 1
123 else:
123 else:
124 # Consume the matching lines
124 # Consume the matching lines
125 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
125 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
126 i1 += 1
126 i1 += 1
127 i2 += 1
127 i2 += 1
128 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
128 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
129 s1 = i1
129 s1 = i1
130 s2 = i2
130 s2 = i2
131
131
132 def hunkinrange(hunk, linerange):
132 def hunkinrange(hunk, linerange):
133 """Return True if `hunk` defined as (start, length) is in `linerange`
133 """Return True if `hunk` defined as (start, length) is in `linerange`
134 defined as (lowerbound, upperbound).
134 defined as (lowerbound, upperbound).
135
135
136 >>> hunkinrange((5, 10), (2, 7))
136 >>> hunkinrange((5, 10), (2, 7))
137 True
137 True
138 >>> hunkinrange((5, 10), (6, 12))
138 >>> hunkinrange((5, 10), (6, 12))
139 True
139 True
140 >>> hunkinrange((5, 10), (13, 17))
140 >>> hunkinrange((5, 10), (13, 17))
141 True
141 True
142 >>> hunkinrange((5, 10), (3, 17))
142 >>> hunkinrange((5, 10), (3, 17))
143 True
143 True
144 >>> hunkinrange((5, 10), (1, 3))
144 >>> hunkinrange((5, 10), (1, 3))
145 False
145 False
146 >>> hunkinrange((5, 10), (18, 20))
146 >>> hunkinrange((5, 10), (18, 20))
147 False
147 False
148 >>> hunkinrange((5, 10), (1, 5))
148 >>> hunkinrange((5, 10), (1, 5))
149 False
149 False
150 >>> hunkinrange((5, 10), (15, 27))
150 >>> hunkinrange((5, 10), (15, 27))
151 False
151 False
152 """
152 """
153 start, length = hunk
153 start, length = hunk
154 lowerbound, upperbound = linerange
154 lowerbound, upperbound = linerange
155 return lowerbound < start + length and start < upperbound
155 return lowerbound < start + length and start < upperbound
156
156
157 def blocksinrange(blocks, rangeb):
157 def blocksinrange(blocks, rangeb):
158 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
158 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
159 `rangeb` from ``(b1, b2)`` point of view.
159 `rangeb` from ``(b1, b2)`` point of view.
160
160
161 Return `filteredblocks, rangea` where:
161 Return `filteredblocks, rangea` where:
162
162
163 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
163 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
164 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
164 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
165 block ``(b1, b2)`` being inside `rangeb` if
165 block ``(b1, b2)`` being inside `rangeb` if
166 ``rangeb[0] < b2 and b1 < rangeb[1]``;
166 ``rangeb[0] < b2 and b1 < rangeb[1]``;
167 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
167 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
168 """
168 """
169 lbb, ubb = rangeb
169 lbb, ubb = rangeb
170 lba, uba = None, None
170 lba, uba = None, None
171 filteredblocks = []
171 filteredblocks = []
172 for block in blocks:
172 for block in blocks:
173 (a1, a2, b1, b2), stype = block
173 (a1, a2, b1, b2), stype = block
174 if lbb >= b1 and ubb <= b2 and stype == '=':
174 if lbb >= b1 and ubb <= b2 and stype == '=':
175 # rangeb is within a single "=" hunk, restrict back linerange1
175 # rangeb is within a single "=" hunk, restrict back linerange1
176 # by offsetting rangeb
176 # by offsetting rangeb
177 lba = lbb - b1 + a1
177 lba = lbb - b1 + a1
178 uba = ubb - b1 + a1
178 uba = ubb - b1 + a1
179 else:
179 else:
180 if b1 <= lbb < b2:
180 if b1 <= lbb < b2:
181 if stype == '=':
181 if stype == '=':
182 lba = a2 - (b2 - lbb)
182 lba = a2 - (b2 - lbb)
183 else:
183 else:
184 lba = a1
184 lba = a1
185 if b1 < ubb <= b2:
185 if b1 < ubb <= b2:
186 if stype == '=':
186 if stype == '=':
187 uba = a1 + (ubb - b1)
187 uba = a1 + (ubb - b1)
188 else:
188 else:
189 uba = a2
189 uba = a2
190 if hunkinrange((b1, (b2 - b1)), rangeb):
190 if hunkinrange((b1, (b2 - b1)), rangeb):
191 filteredblocks.append(block)
191 filteredblocks.append(block)
192 if lba is None or uba is None or uba < lba:
192 if lba is None or uba is None or uba < lba:
193 raise error.Abort(_('line range exceeds file size'))
193 raise error.Abort(_('line range exceeds file size'))
194 return filteredblocks, (lba, uba)
194 return filteredblocks, (lba, uba)
195
195
196 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
196 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
197 """Return (block, type) tuples, where block is an mdiff.blocks
197 """Return (block, type) tuples, where block is an mdiff.blocks
198 line entry. type is '=' for blocks matching exactly one another
198 line entry. type is '=' for blocks matching exactly one another
199 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
199 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
200 matching only after having filtered blank lines.
200 matching only after having filtered blank lines.
201 line1 and line2 are text1 and text2 split with splitnewlines() if
201 line1 and line2 are text1 and text2 split with splitnewlines() if
202 they are already available.
202 they are already available.
203 """
203 """
204 if opts is None:
204 if opts is None:
205 opts = defaultopts
205 opts = defaultopts
206 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
206 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
207 text1 = wsclean(opts, text1, False)
207 text1 = wsclean(opts, text1, False)
208 text2 = wsclean(opts, text2, False)
208 text2 = wsclean(opts, text2, False)
209 diff = bdiff.blocks(text1, text2)
209 diff = bdiff.blocks(text1, text2)
210 for i, s1 in enumerate(diff):
210 for i, s1 in enumerate(diff):
211 # The first match is special.
211 # The first match is special.
212 # we've either found a match starting at line 0 or a match later
212 # we've either found a match starting at line 0 or a match later
213 # in the file. If it starts later, old and new below will both be
213 # in the file. If it starts later, old and new below will both be
214 # empty and we'll continue to the next match.
214 # empty and we'll continue to the next match.
215 if i > 0:
215 if i > 0:
216 s = diff[i - 1]
216 s = diff[i - 1]
217 else:
217 else:
218 s = [0, 0, 0, 0]
218 s = [0, 0, 0, 0]
219 s = [s[1], s1[0], s[3], s1[2]]
219 s = [s[1], s1[0], s[3], s1[2]]
220
220
221 # bdiff sometimes gives huge matches past eof, this check eats them,
221 # bdiff sometimes gives huge matches past eof, this check eats them,
222 # and deals with the special first match case described above
222 # and deals with the special first match case described above
223 if s[0] != s[1] or s[2] != s[3]:
223 if s[0] != s[1] or s[2] != s[3]:
224 type = '!'
224 type = '!'
225 if opts.ignoreblanklines:
225 if opts.ignoreblanklines:
226 if lines1 is None:
226 if lines1 is None:
227 lines1 = splitnewlines(text1)
227 lines1 = splitnewlines(text1)
228 if lines2 is None:
228 if lines2 is None:
229 lines2 = splitnewlines(text2)
229 lines2 = splitnewlines(text2)
230 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
230 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
231 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
231 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
232 if old == new:
232 if old == new:
233 type = '~'
233 type = '~'
234 yield s, type
234 yield s, type
235 yield s1, '='
235 yield s1, '='
236
236
237 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
237 def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
238 """Return a unified diff as a (headers, hunks) tuple.
238 """Return a unified diff as a (headers, hunks) tuple.
239
239
240 If the diff is not null, `headers` is a list with unified diff header
240 If the diff is not null, `headers` is a list with unified diff header
241 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
241 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
242 (hunkrange, hunklines) coming from _unidiff().
242 (hunkrange, hunklines) coming from _unidiff().
243 Otherwise, `headers` and `hunks` are empty.
243 Otherwise, `headers` and `hunks` are empty.
244 """
244 """
245 def datetag(date, fn=None):
245 def datetag(date, fn=None):
246 if not opts.git and not opts.nodates:
246 if not opts.git and not opts.nodates:
247 return '\t%s' % date
247 return '\t%s' % date
248 if fn and ' ' in fn:
248 if fn and ' ' in fn:
249 return '\t'
249 return '\t'
250 return ''
250 return ''
251
251
252 sentinel = [], ()
252 sentinel = [], ()
253 if not a and not b:
253 if not a and not b:
254 return sentinel
254 return sentinel
255
255
256 if opts.noprefix:
256 if opts.noprefix:
257 aprefix = bprefix = ''
257 aprefix = bprefix = ''
258 else:
258 else:
259 aprefix = 'a/'
259 aprefix = 'a/'
260 bprefix = 'b/'
260 bprefix = 'b/'
261
261
262 epoch = util.datestr((0, 0))
262 epoch = util.datestr((0, 0))
263
263
264 fn1 = util.pconvert(fn1)
264 fn1 = util.pconvert(fn1)
265 fn2 = util.pconvert(fn2)
265 fn2 = util.pconvert(fn2)
266
266
267 def checknonewline(lines):
267 def checknonewline(lines):
268 for text in lines:
268 for text in lines:
269 if text[-1:] != '\n':
269 if text[-1:] != '\n':
270 text += "\n\ No newline at end of file\n"
270 text += "\n\ No newline at end of file\n"
271 yield text
271 yield text
272
272
273 if not opts.text and (util.binary(a) or util.binary(b)):
273 if not opts.text and (util.binary(a) or util.binary(b)):
274 if a and b and len(a) == len(b) and a == b:
274 if a and b and len(a) == len(b) and a == b:
275 return sentinel
275 return sentinel
276 headerlines = []
276 headerlines = []
277 hunks = (None, ['Binary file %s has changed\n' % fn1]),
277 hunks = (None, ['Binary file %s has changed\n' % fn1]),
278 elif not a:
278 elif not a:
279 b = splitnewlines(b)
279 b = splitnewlines(b)
280 if a is None:
280 if a is None:
281 l1 = '--- /dev/null%s' % datetag(epoch)
281 l1 = '--- /dev/null%s' % datetag(epoch)
282 else:
282 else:
283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
283 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
284 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
285 headerlines = [l1, l2]
285 headerlines = [l1, l2]
286 size = len(b)
286 size = len(b)
287 hunkrange = (0, 0, 1, size)
287 hunkrange = (0, 0, 1, size)
288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
288 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
289 hunks = (hunkrange, checknonewline(hunklines)),
289 hunks = (hunkrange, checknonewline(hunklines)),
290 elif not b:
290 elif not b:
291 a = splitnewlines(a)
291 a = splitnewlines(a)
292 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
292 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
293 if b is None:
293 if b is None:
294 l2 = '+++ /dev/null%s' % datetag(epoch)
294 l2 = '+++ /dev/null%s' % datetag(epoch)
295 else:
295 else:
296 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
296 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
297 headerlines = [l1, l2]
297 headerlines = [l1, l2]
298 size = len(a)
298 size = len(a)
299 hunkrange = (1, size, 0, 0)
299 hunkrange = (1, size, 0, 0)
300 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
300 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
301 hunks = (hunkrange, checknonewline(hunklines)),
301 hunks = (hunkrange, checknonewline(hunklines)),
302 else:
302 else:
303 diffhunks = _unidiff(a, b, opts=opts)
303 diffhunks = _unidiff(a, b, opts=opts)
304 try:
304 try:
305 hunkrange, hunklines = next(diffhunks)
305 hunkrange, hunklines = next(diffhunks)
306 except StopIteration:
306 except StopIteration:
307 return sentinel
307 return sentinel
308
308
309 headerlines = [
309 headerlines = [
310 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
310 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
311 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
311 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
312 ]
312 ]
313 def rewindhunks():
313 def rewindhunks():
314 yield hunkrange, checknonewline(hunklines)
314 yield hunkrange, checknonewline(hunklines)
315 for hr, hl in diffhunks:
315 for hr, hl in diffhunks:
316 yield hr, checknonewline(hl)
316 yield hr, checknonewline(hl)
317
317
318 hunks = rewindhunks()
318 hunks = rewindhunks()
319
319
320 return headerlines, hunks
320 return headerlines, hunks
321
321
322 def _unidiff(t1, t2, opts=defaultopts):
322 def _unidiff(t1, t2, opts=defaultopts):
323 """Yield hunks of a headerless unified diff from t1 and t2 texts.
323 """Yield hunks of a headerless unified diff from t1 and t2 texts.
324
324
325 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
325 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
326 tuple (s1, l1, s2, l2) representing the range information of the hunk to
326 tuple (s1, l1, s2, l2) representing the range information of the hunk to
327 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
327 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
328 of the hunk combining said header followed by line additions and
328 of the hunk combining said header followed by line additions and
329 deletions.
329 deletions.
330 """
330 """
331 l1 = splitnewlines(t1)
331 l1 = splitnewlines(t1)
332 l2 = splitnewlines(t2)
332 l2 = splitnewlines(t2)
333 def contextend(l, len):
333 def contextend(l, len):
334 ret = l + opts.context
334 ret = l + opts.context
335 if ret > len:
335 if ret > len:
336 ret = len
336 ret = len
337 return ret
337 return ret
338
338
339 def contextstart(l):
339 def contextstart(l):
340 ret = l - opts.context
340 ret = l - opts.context
341 if ret < 0:
341 if ret < 0:
342 return 0
342 return 0
343 return ret
343 return ret
344
344
345 lastfunc = [0, '']
345 lastfunc = [0, '']
346 def yieldhunk(hunk):
346 def yieldhunk(hunk):
347 (astart, a2, bstart, b2, delta) = hunk
347 (astart, a2, bstart, b2, delta) = hunk
348 aend = contextend(a2, len(l1))
348 aend = contextend(a2, len(l1))
349 alen = aend - astart
349 alen = aend - astart
350 blen = b2 - bstart + aend - a2
350 blen = b2 - bstart + aend - a2
351
351
352 func = ""
352 func = ""
353 if opts.showfunc:
353 if opts.showfunc:
354 lastpos, func = lastfunc
354 lastpos, func = lastfunc
355 # walk backwards from the start of the context up to the start of
355 # walk backwards from the start of the context up to the start of
356 # the previous hunk context until we find a line starting with an
356 # the previous hunk context until we find a line starting with an
357 # alphanumeric char.
357 # alphanumeric char.
358 for i in xrange(astart - 1, lastpos - 1, -1):
358 for i in xrange(astart - 1, lastpos - 1, -1):
359 if l1[i][0].isalnum():
359 if l1[i][0:1].isalnum():
360 func = ' ' + l1[i].rstrip()[:40]
360 func = ' ' + l1[i].rstrip()[:40]
361 lastfunc[1] = func
361 lastfunc[1] = func
362 break
362 break
363 # by recording this hunk's starting point as the next place to
363 # by recording this hunk's starting point as the next place to
364 # start looking for function lines, we avoid reading any line in
364 # start looking for function lines, we avoid reading any line in
365 # the file more than once.
365 # the file more than once.
366 lastfunc[0] = astart
366 lastfunc[0] = astart
367
367
368 # zero-length hunk ranges report their start line as one less
368 # zero-length hunk ranges report their start line as one less
369 if alen:
369 if alen:
370 astart += 1
370 astart += 1
371 if blen:
371 if blen:
372 bstart += 1
372 bstart += 1
373
373
374 hunkrange = astart, alen, bstart, blen
374 hunkrange = astart, alen, bstart, blen
375 hunklines = (
375 hunklines = (
376 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
376 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
377 + delta
377 + delta
378 + [' ' + l1[x] for x in xrange(a2, aend)]
378 + [' ' + l1[x] for x in xrange(a2, aend)]
379 )
379 )
380 yield hunkrange, hunklines
380 yield hunkrange, hunklines
381
381
382 # bdiff.blocks gives us the matching sequences in the files. The loop
382 # bdiff.blocks gives us the matching sequences in the files. The loop
383 # below finds the spaces between those matching sequences and translates
383 # below finds the spaces between those matching sequences and translates
384 # them into diff output.
384 # them into diff output.
385 #
385 #
386 hunk = None
386 hunk = None
387 ignoredlines = 0
387 ignoredlines = 0
388 for s, stype in allblocks(t1, t2, opts, l1, l2):
388 for s, stype in allblocks(t1, t2, opts, l1, l2):
389 a1, a2, b1, b2 = s
389 a1, a2, b1, b2 = s
390 if stype != '!':
390 if stype != '!':
391 if stype == '~':
391 if stype == '~':
392 # The diff context lines are based on t1 content. When
392 # The diff context lines are based on t1 content. When
393 # blank lines are ignored, the new lines offsets must
393 # blank lines are ignored, the new lines offsets must
394 # be adjusted as if equivalent blocks ('~') had the
394 # be adjusted as if equivalent blocks ('~') had the
395 # same sizes on both sides.
395 # same sizes on both sides.
396 ignoredlines += (b2 - b1) - (a2 - a1)
396 ignoredlines += (b2 - b1) - (a2 - a1)
397 continue
397 continue
398 delta = []
398 delta = []
399 old = l1[a1:a2]
399 old = l1[a1:a2]
400 new = l2[b1:b2]
400 new = l2[b1:b2]
401
401
402 b1 -= ignoredlines
402 b1 -= ignoredlines
403 b2 -= ignoredlines
403 b2 -= ignoredlines
404 astart = contextstart(a1)
404 astart = contextstart(a1)
405 bstart = contextstart(b1)
405 bstart = contextstart(b1)
406 prev = None
406 prev = None
407 if hunk:
407 if hunk:
408 # join with the previous hunk if it falls inside the context
408 # join with the previous hunk if it falls inside the context
409 if astart < hunk[1] + opts.context + 1:
409 if astart < hunk[1] + opts.context + 1:
410 prev = hunk
410 prev = hunk
411 astart = hunk[1]
411 astart = hunk[1]
412 bstart = hunk[3]
412 bstart = hunk[3]
413 else:
413 else:
414 for x in yieldhunk(hunk):
414 for x in yieldhunk(hunk):
415 yield x
415 yield x
416 if prev:
416 if prev:
417 # we've joined the previous hunk, record the new ending points.
417 # we've joined the previous hunk, record the new ending points.
418 hunk[1] = a2
418 hunk[1] = a2
419 hunk[3] = b2
419 hunk[3] = b2
420 delta = hunk[4]
420 delta = hunk[4]
421 else:
421 else:
422 # create a new hunk
422 # create a new hunk
423 hunk = [astart, a2, bstart, b2, delta]
423 hunk = [astart, a2, bstart, b2, delta]
424
424
425 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
425 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
426 delta[len(delta):] = ['-' + x for x in old]
426 delta[len(delta):] = ['-' + x for x in old]
427 delta[len(delta):] = ['+' + x for x in new]
427 delta[len(delta):] = ['+' + x for x in new]
428
428
429 if hunk:
429 if hunk:
430 for x in yieldhunk(hunk):
430 for x in yieldhunk(hunk):
431 yield x
431 yield x
432
432
433 def b85diff(to, tn):
433 def b85diff(to, tn):
434 '''print base85-encoded binary diff'''
434 '''print base85-encoded binary diff'''
435 def fmtline(line):
435 def fmtline(line):
436 l = len(line)
436 l = len(line)
437 if l <= 26:
437 if l <= 26:
438 l = chr(ord('A') + l - 1)
438 l = chr(ord('A') + l - 1)
439 else:
439 else:
440 l = chr(l - 26 + ord('a') - 1)
440 l = chr(l - 26 + ord('a') - 1)
441 return '%c%s\n' % (l, util.b85encode(line, True))
441 return '%c%s\n' % (l, util.b85encode(line, True))
442
442
443 def chunk(text, csize=52):
443 def chunk(text, csize=52):
444 l = len(text)
444 l = len(text)
445 i = 0
445 i = 0
446 while i < l:
446 while i < l:
447 yield text[i:i + csize]
447 yield text[i:i + csize]
448 i += csize
448 i += csize
449
449
450 if to is None:
450 if to is None:
451 to = ''
451 to = ''
452 if tn is None:
452 if tn is None:
453 tn = ''
453 tn = ''
454
454
455 if to == tn:
455 if to == tn:
456 return ''
456 return ''
457
457
458 # TODO: deltas
458 # TODO: deltas
459 ret = []
459 ret = []
460 ret.append('GIT binary patch\n')
460 ret.append('GIT binary patch\n')
461 ret.append('literal %d\n' % len(tn))
461 ret.append('literal %d\n' % len(tn))
462 for l in chunk(zlib.compress(tn)):
462 for l in chunk(zlib.compress(tn)):
463 ret.append(fmtline(l))
463 ret.append(fmtline(l))
464 ret.append('\n')
464 ret.append('\n')
465
465
466 return ''.join(ret)
466 return ''.join(ret)
467
467
468 def patchtext(bin):
468 def patchtext(bin):
469 pos = 0
469 pos = 0
470 t = []
470 t = []
471 while pos < len(bin):
471 while pos < len(bin):
472 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
472 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
473 pos += 12
473 pos += 12
474 t.append(bin[pos:pos + l])
474 t.append(bin[pos:pos + l])
475 pos += l
475 pos += l
476 return "".join(t)
476 return "".join(t)
477
477
478 def patch(a, bin):
478 def patch(a, bin):
479 if len(a) == 0:
479 if len(a) == 0:
480 # skip over trivial delta header
480 # skip over trivial delta header
481 return util.buffer(bin, 12)
481 return util.buffer(bin, 12)
482 return mpatch.patches(a, [bin])
482 return mpatch.patches(a, [bin])
483
483
484 # similar to difflib.SequenceMatcher.get_matching_blocks
484 # similar to difflib.SequenceMatcher.get_matching_blocks
485 def get_matching_blocks(a, b):
485 def get_matching_blocks(a, b):
486 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
486 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
487
487
488 def trivialdiffheader(length):
488 def trivialdiffheader(length):
489 return struct.pack(">lll", 0, 0, length) if length else ''
489 return struct.pack(">lll", 0, 0, length) if length else ''
490
490
491 def replacediffheader(oldlen, newlen):
491 def replacediffheader(oldlen, newlen):
492 return struct.pack(">lll", 0, oldlen, newlen)
492 return struct.pack(">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now