##// END OF EJS Templates
bdiff: write a native version of splitnewlines...
Augie Fackler -
r36219:29dd37a4 default
parent child Browse files
Show More
@@ -1,213 +1,263 b''
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #define PY_SSIZE_T_CLEAN
13 13 #include <Python.h>
14 14 #include <limits.h>
15 15 #include <stdlib.h>
16 16 #include <string.h>
17 17
18 18 #include "bdiff.h"
19 19 #include "bitmanipulation.h"
20 20 #include "util.h"
21 21
22 22 static PyObject *blocks(PyObject *self, PyObject *args)
23 23 {
24 24 PyObject *sa, *sb, *rl = NULL, *m;
25 25 struct bdiff_line *a, *b;
26 26 struct bdiff_hunk l, *h;
27 27 int an, bn, count, pos = 0;
28 28
29 29 l.next = NULL;
30 30
31 31 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
32 32 return NULL;
33 33
34 34 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
35 35 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
36 36
37 37 if (!a || !b)
38 38 goto nomem;
39 39
40 40 count = bdiff_diff(a, an, b, bn, &l);
41 41 if (count < 0)
42 42 goto nomem;
43 43
44 44 rl = PyList_New(count);
45 45 if (!rl)
46 46 goto nomem;
47 47
48 48 for (h = l.next; h; h = h->next) {
49 49 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
50 50 PyList_SetItem(rl, pos, m);
51 51 pos++;
52 52 }
53 53
54 54 nomem:
55 55 free(a);
56 56 free(b);
57 57 bdiff_freehunks(l.next);
58 58 return rl ? rl : PyErr_NoMemory();
59 59 }
60 60
61 61 static PyObject *bdiff(PyObject *self, PyObject *args)
62 62 {
63 63 char *sa, *sb, *rb, *ia, *ib;
64 64 PyObject *result = NULL;
65 65 struct bdiff_line *al, *bl;
66 66 struct bdiff_hunk l, *h;
67 67 int an, bn, count;
68 68 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
69 69 PyThreadState *_save;
70 70
71 71 l.next = NULL;
72 72
73 73 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
74 74 return NULL;
75 75
76 76 if (la > UINT_MAX || lb > UINT_MAX) {
77 77 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
78 78 return NULL;
79 79 }
80 80
81 81 _save = PyEval_SaveThread();
82 82
83 83 lmax = la > lb ? lb : la;
84 84 for (ia = sa, ib = sb; li < lmax && *ia == *ib; ++li, ++ia, ++ib)
85 85 if (*ia == '\n')
86 86 lcommon = li + 1;
87 87 /* we can almost add: if (li == lmax) lcommon = li; */
88 88
89 89 an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
90 90 bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
91 91 if (!al || !bl)
92 92 goto nomem;
93 93
94 94 count = bdiff_diff(al, an, bl, bn, &l);
95 95 if (count < 0)
96 96 goto nomem;
97 97
98 98 /* calculate length of output */
99 99 la = lb = 0;
100 100 for (h = l.next; h; h = h->next) {
101 101 if (h->a1 != la || h->b1 != lb)
102 102 len += 12 + bl[h->b1].l - bl[lb].l;
103 103 la = h->a2;
104 104 lb = h->b2;
105 105 }
106 106 PyEval_RestoreThread(_save);
107 107 _save = NULL;
108 108
109 109 result = PyBytes_FromStringAndSize(NULL, len);
110 110
111 111 if (!result)
112 112 goto nomem;
113 113
114 114 /* build binary patch */
115 115 rb = PyBytes_AsString(result);
116 116 la = lb = 0;
117 117
118 118 for (h = l.next; h; h = h->next) {
119 119 if (h->a1 != la || h->b1 != lb) {
120 120 len = bl[h->b1].l - bl[lb].l;
121 121 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
122 122 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
123 123 rb + 4);
124 124 putbe32((uint32_t)len, rb + 8);
125 125 memcpy(rb + 12, bl[lb].l, len);
126 126 rb += 12 + len;
127 127 }
128 128 la = h->a2;
129 129 lb = h->b2;
130 130 }
131 131
132 132 nomem:
133 133 if (_save)
134 134 PyEval_RestoreThread(_save);
135 135 free(al);
136 136 free(bl);
137 137 bdiff_freehunks(l.next);
138 138 return result ? result : PyErr_NoMemory();
139 139 }
140 140
141 141 /*
142 142 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
143 143 * reduce whitespace sequences to a single space and trim remaining whitespace
144 144 * from end of lines.
145 145 */
146 146 static PyObject *fixws(PyObject *self, PyObject *args)
147 147 {
148 148 PyObject *s, *result = NULL;
149 149 char allws, c;
150 150 const char *r;
151 151 Py_ssize_t i, rlen, wlen = 0;
152 152 char *w;
153 153
154 154 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
155 155 return NULL;
156 156 r = PyBytes_AsString(s);
157 157 rlen = PyBytes_Size(s);
158 158
159 159 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
160 160 if (!w)
161 161 goto nomem;
162 162
163 163 for (i = 0; i != rlen; i++) {
164 164 c = r[i];
165 165 if (c == ' ' || c == '\t' || c == '\r') {
166 166 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
167 167 w[wlen++] = ' ';
168 168 } else if (c == '\n' && !allws && wlen > 0 &&
169 169 w[wlen - 1] == ' ') {
170 170 w[wlen - 1] = '\n';
171 171 } else {
172 172 w[wlen++] = c;
173 173 }
174 174 }
175 175
176 176 result = PyBytes_FromStringAndSize(w, wlen);
177 177
178 178 nomem:
179 179 PyMem_Free(w);
180 180 return result ? result : PyErr_NoMemory();
181 181 }
182 182
183 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
184 const char *source, Py_ssize_t len)
185 {
186 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
187 if (sliced == NULL)
188 return false;
189 PyList_SET_ITEM(list, destidx, sliced);
190 return true;
191 }
192
193 static PyObject *splitnewlines(PyObject *self, PyObject *args)
194 {
195 const char *text;
196 Py_ssize_t nelts = 0, size, i, start = 0;
197 PyObject *result = NULL;
198
199 if (!PyArg_ParseTuple(args, "s#", &text, &size)) {
200 goto abort;
201 }
202 if (!size) {
203 return PyList_New(0);
204 }
205 /* This loops to size-1 because if the last byte is a newline,
206 * we don't want to perform a split there. */
207 for (i = 0; i < size - 1; ++i) {
208 if (text[i] == '\n') {
209 ++nelts;
210 }
211 }
212 if ((result = PyList_New(nelts + 1)) == NULL)
213 goto abort;
214 nelts = 0;
215 for (i = 0; i < size - 1; ++i) {
216 if (text[i] == '\n') {
217 if (!sliceintolist(result, nelts++, text + start,
218 i - start + 1))
219 goto abort;
220 start = i + 1;
221 }
222 }
223 if (!sliceintolist(result, nelts++, text + start, size - start))
224 goto abort;
225 return result;
226 abort:
227 Py_XDECREF(result);
228 return NULL;
229 }
230
183 231 static char mdiff_doc[] = "Efficient binary diff.";
184 232
185 233 static PyMethodDef methods[] = {
186 234 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
187 235 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
188 236 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
237 {"splitnewlines", splitnewlines, METH_VARARGS,
238 "like str.splitlines, but only split on newlines\n"},
189 239 {NULL, NULL},
190 240 };
191 241
192 static const int version = 1;
242 static const int version = 2;
193 243
194 244 #ifdef IS_PY3K
195 245 static struct PyModuleDef bdiff_module = {
196 246 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
197 247 };
198 248
199 249 PyMODINIT_FUNC PyInit_bdiff(void)
200 250 {
201 251 PyObject *m;
202 252 m = PyModule_Create(&bdiff_module);
203 253 PyModule_AddIntConstant(m, "version", version);
204 254 return m;
205 255 }
206 256 #else
207 257 PyMODINIT_FUNC initbdiff(void)
208 258 {
209 259 PyObject *m;
210 260 m = Py_InitModule3("bdiff", methods, mdiff_doc);
211 261 PyModule_AddIntConstant(m, "version", version);
212 262 }
213 263 #endif
@@ -1,521 +1,512 b''
1 1 # mdiff.py - diff and patch routines for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12 import zlib
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 policy,
18 18 pycompat,
19 19 util,
20 20 )
21 21
22 22 _missing_newline_marker = "\\ No newline at end of file\n"
23 23
24 24 bdiff = policy.importmod(r'bdiff')
25 25 mpatch = policy.importmod(r'mpatch')
26 26
27 27 blocks = bdiff.blocks
28 28 fixws = bdiff.fixws
29 29 patches = mpatch.patches
30 30 patchedsize = mpatch.patchedsize
31 31 textdiff = bdiff.bdiff
32
33 def splitnewlines(text):
34 '''like str.splitlines, but only split on newlines.'''
35 lines = [l + '\n' for l in text.split('\n')]
36 if lines:
37 if lines[-1] == '\n':
38 lines.pop()
39 else:
40 lines[-1] = lines[-1][:-1]
41 return lines
32 splitnewlines = bdiff.splitnewlines
42 33
43 34 class diffopts(object):
44 35 '''context is the number of context lines
45 36 text treats all files as text
46 37 showfunc enables diff -p output
47 38 git enables the git extended patch format
48 39 nodates removes dates from diff headers
49 40 nobinary ignores binary files
50 41 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
51 42 ignorews ignores all whitespace changes in the diff
52 43 ignorewsamount ignores changes in the amount of whitespace
53 44 ignoreblanklines ignores changes whose lines are all blank
54 45 upgrade generates git diffs to avoid data loss
55 46 '''
56 47
57 48 defaults = {
58 49 'context': 3,
59 50 'text': False,
60 51 'showfunc': False,
61 52 'git': False,
62 53 'nodates': False,
63 54 'nobinary': False,
64 55 'noprefix': False,
65 56 'index': 0,
66 57 'ignorews': False,
67 58 'ignorewsamount': False,
68 59 'ignorewseol': False,
69 60 'ignoreblanklines': False,
70 61 'upgrade': False,
71 62 'showsimilarity': False,
72 63 'worddiff': False,
73 64 }
74 65
75 66 def __init__(self, **opts):
76 67 opts = pycompat.byteskwargs(opts)
77 68 for k in self.defaults.keys():
78 69 v = opts.get(k)
79 70 if v is None:
80 71 v = self.defaults[k]
81 72 setattr(self, k, v)
82 73
83 74 try:
84 75 self.context = int(self.context)
85 76 except ValueError:
86 77 raise error.Abort(_('diff context lines count must be '
87 78 'an integer, not %r') % self.context)
88 79
89 80 def copy(self, **kwargs):
90 81 opts = dict((k, getattr(self, k)) for k in self.defaults)
91 82 opts = pycompat.strkwargs(opts)
92 83 opts.update(kwargs)
93 84 return diffopts(**opts)
94 85
95 86 defaultopts = diffopts()
96 87
97 88 def wsclean(opts, text, blank=True):
98 89 if opts.ignorews:
99 90 text = bdiff.fixws(text, 1)
100 91 elif opts.ignorewsamount:
101 92 text = bdiff.fixws(text, 0)
102 93 if blank and opts.ignoreblanklines:
103 94 text = re.sub('\n+', '\n', text).strip('\n')
104 95 if opts.ignorewseol:
105 96 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
106 97 return text
107 98
108 99 def splitblock(base1, lines1, base2, lines2, opts):
109 100 # The input lines matches except for interwoven blank lines. We
110 101 # transform it into a sequence of matching blocks and blank blocks.
111 102 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
112 103 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
113 104 s1, e1 = 0, len(lines1)
114 105 s2, e2 = 0, len(lines2)
115 106 while s1 < e1 or s2 < e2:
116 107 i1, i2, btype = s1, s2, '='
117 108 if (i1 >= e1 or lines1[i1] == 0
118 109 or i2 >= e2 or lines2[i2] == 0):
119 110 # Consume the block of blank lines
120 111 btype = '~'
121 112 while i1 < e1 and lines1[i1] == 0:
122 113 i1 += 1
123 114 while i2 < e2 and lines2[i2] == 0:
124 115 i2 += 1
125 116 else:
126 117 # Consume the matching lines
127 118 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
128 119 i1 += 1
129 120 i2 += 1
130 121 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
131 122 s1 = i1
132 123 s2 = i2
133 124
134 125 def hunkinrange(hunk, linerange):
135 126 """Return True if `hunk` defined as (start, length) is in `linerange`
136 127 defined as (lowerbound, upperbound).
137 128
138 129 >>> hunkinrange((5, 10), (2, 7))
139 130 True
140 131 >>> hunkinrange((5, 10), (6, 12))
141 132 True
142 133 >>> hunkinrange((5, 10), (13, 17))
143 134 True
144 135 >>> hunkinrange((5, 10), (3, 17))
145 136 True
146 137 >>> hunkinrange((5, 10), (1, 3))
147 138 False
148 139 >>> hunkinrange((5, 10), (18, 20))
149 140 False
150 141 >>> hunkinrange((5, 10), (1, 5))
151 142 False
152 143 >>> hunkinrange((5, 10), (15, 27))
153 144 False
154 145 """
155 146 start, length = hunk
156 147 lowerbound, upperbound = linerange
157 148 return lowerbound < start + length and start < upperbound
158 149
159 150 def blocksinrange(blocks, rangeb):
160 151 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
161 152 `rangeb` from ``(b1, b2)`` point of view.
162 153
163 154 Return `filteredblocks, rangea` where:
164 155
165 156 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
166 157 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
167 158 block ``(b1, b2)`` being inside `rangeb` if
168 159 ``rangeb[0] < b2 and b1 < rangeb[1]``;
169 160 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
170 161 """
171 162 lbb, ubb = rangeb
172 163 lba, uba = None, None
173 164 filteredblocks = []
174 165 for block in blocks:
175 166 (a1, a2, b1, b2), stype = block
176 167 if lbb >= b1 and ubb <= b2 and stype == '=':
177 168 # rangeb is within a single "=" hunk, restrict back linerange1
178 169 # by offsetting rangeb
179 170 lba = lbb - b1 + a1
180 171 uba = ubb - b1 + a1
181 172 else:
182 173 if b1 <= lbb < b2:
183 174 if stype == '=':
184 175 lba = a2 - (b2 - lbb)
185 176 else:
186 177 lba = a1
187 178 if b1 < ubb <= b2:
188 179 if stype == '=':
189 180 uba = a1 + (ubb - b1)
190 181 else:
191 182 uba = a2
192 183 if hunkinrange((b1, (b2 - b1)), rangeb):
193 184 filteredblocks.append(block)
194 185 if lba is None or uba is None or uba < lba:
195 186 raise error.Abort(_('line range exceeds file size'))
196 187 return filteredblocks, (lba, uba)
197 188
198 189 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
199 190 """Return (block, type) tuples, where block is an mdiff.blocks
200 191 line entry. type is '=' for blocks matching exactly one another
201 192 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
202 193 matching only after having filtered blank lines.
203 194 line1 and line2 are text1 and text2 split with splitnewlines() if
204 195 they are already available.
205 196 """
206 197 if opts is None:
207 198 opts = defaultopts
208 199 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
209 200 text1 = wsclean(opts, text1, False)
210 201 text2 = wsclean(opts, text2, False)
211 202 diff = bdiff.blocks(text1, text2)
212 203 for i, s1 in enumerate(diff):
213 204 # The first match is special.
214 205 # we've either found a match starting at line 0 or a match later
215 206 # in the file. If it starts later, old and new below will both be
216 207 # empty and we'll continue to the next match.
217 208 if i > 0:
218 209 s = diff[i - 1]
219 210 else:
220 211 s = [0, 0, 0, 0]
221 212 s = [s[1], s1[0], s[3], s1[2]]
222 213
223 214 # bdiff sometimes gives huge matches past eof, this check eats them,
224 215 # and deals with the special first match case described above
225 216 if s[0] != s[1] or s[2] != s[3]:
226 217 type = '!'
227 218 if opts.ignoreblanklines:
228 219 if lines1 is None:
229 220 lines1 = splitnewlines(text1)
230 221 if lines2 is None:
231 222 lines2 = splitnewlines(text2)
232 223 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
233 224 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
234 225 if old == new:
235 226 type = '~'
236 227 yield s, type
237 228 yield s1, '='
238 229
239 230 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
240 231 """Return a unified diff as a (headers, hunks) tuple.
241 232
242 233 If the diff is not null, `headers` is a list with unified diff header
243 234 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
244 235 (hunkrange, hunklines) coming from _unidiff().
245 236 Otherwise, `headers` and `hunks` are empty.
246 237
247 238 Set binary=True if either a or b should be taken as a binary file.
248 239 """
249 240 def datetag(date, fn=None):
250 241 if not opts.git and not opts.nodates:
251 242 return '\t%s' % date
252 243 if fn and ' ' in fn:
253 244 return '\t'
254 245 return ''
255 246
256 247 sentinel = [], ()
257 248 if not a and not b:
258 249 return sentinel
259 250
260 251 if opts.noprefix:
261 252 aprefix = bprefix = ''
262 253 else:
263 254 aprefix = 'a/'
264 255 bprefix = 'b/'
265 256
266 257 epoch = util.datestr((0, 0))
267 258
268 259 fn1 = util.pconvert(fn1)
269 260 fn2 = util.pconvert(fn2)
270 261
271 262 if binary:
272 263 if a and b and len(a) == len(b) and a == b:
273 264 return sentinel
274 265 headerlines = []
275 266 hunks = (None, ['Binary file %s has changed\n' % fn1]),
276 267 elif not a:
277 268 without_newline = not b.endswith('\n')
278 269 b = splitnewlines(b)
279 270 if a is None:
280 271 l1 = '--- /dev/null%s' % datetag(epoch)
281 272 else:
282 273 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
283 274 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
284 275 headerlines = [l1, l2]
285 276 size = len(b)
286 277 hunkrange = (0, 0, 1, size)
287 278 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
288 279 if without_newline:
289 280 hunklines[-1] += '\n'
290 281 hunklines.append(_missing_newline_marker)
291 282 hunks = (hunkrange, hunklines),
292 283 elif not b:
293 284 without_newline = not a.endswith('\n')
294 285 a = splitnewlines(a)
295 286 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
296 287 if b is None:
297 288 l2 = '+++ /dev/null%s' % datetag(epoch)
298 289 else:
299 290 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
300 291 headerlines = [l1, l2]
301 292 size = len(a)
302 293 hunkrange = (1, size, 0, 0)
303 294 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
304 295 if without_newline:
305 296 hunklines[-1] += '\n'
306 297 hunklines.append(_missing_newline_marker)
307 298 hunks = (hunkrange, hunklines),
308 299 else:
309 300 hunks = _unidiff(a, b, opts=opts)
310 301 if not next(hunks):
311 302 return sentinel
312 303
313 304 headerlines = [
314 305 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
315 306 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
316 307 ]
317 308
318 309 return headerlines, hunks
319 310
320 311 def _unidiff(t1, t2, opts=defaultopts):
321 312 """Yield hunks of a headerless unified diff from t1 and t2 texts.
322 313
323 314 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
324 315 tuple (s1, l1, s2, l2) representing the range information of the hunk to
325 316 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
326 317 of the hunk combining said header followed by line additions and
327 318 deletions.
328 319
329 320 The hunks are prefixed with a bool.
330 321 """
331 322 l1 = splitnewlines(t1)
332 323 l2 = splitnewlines(t2)
333 324 def contextend(l, len):
334 325 ret = l + opts.context
335 326 if ret > len:
336 327 ret = len
337 328 return ret
338 329
339 330 def contextstart(l):
340 331 ret = l - opts.context
341 332 if ret < 0:
342 333 return 0
343 334 return ret
344 335
345 336 lastfunc = [0, '']
346 337 def yieldhunk(hunk):
347 338 (astart, a2, bstart, b2, delta) = hunk
348 339 aend = contextend(a2, len(l1))
349 340 alen = aend - astart
350 341 blen = b2 - bstart + aend - a2
351 342
352 343 func = ""
353 344 if opts.showfunc:
354 345 lastpos, func = lastfunc
355 346 # walk backwards from the start of the context up to the start of
356 347 # the previous hunk context until we find a line starting with an
357 348 # alphanumeric char.
358 349 for i in xrange(astart - 1, lastpos - 1, -1):
359 350 if l1[i][0:1].isalnum():
360 351 func = ' ' + l1[i].rstrip()[:40]
361 352 lastfunc[1] = func
362 353 break
363 354 # by recording this hunk's starting point as the next place to
364 355 # start looking for function lines, we avoid reading any line in
365 356 # the file more than once.
366 357 lastfunc[0] = astart
367 358
368 359 # zero-length hunk ranges report their start line as one less
369 360 if alen:
370 361 astart += 1
371 362 if blen:
372 363 bstart += 1
373 364
374 365 hunkrange = astart, alen, bstart, blen
375 366 hunklines = (
376 367 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
377 368 + delta
378 369 + [' ' + l1[x] for x in xrange(a2, aend)]
379 370 )
380 371 # If either file ends without a newline and the last line of
381 372 # that file is part of a hunk, a marker is printed. If the
382 373 # last line of both files is identical and neither ends in
383 374 # a newline, print only one marker. That's the only case in
384 375 # which the hunk can end in a shared line without a newline.
385 376 skip = False
386 377 if not t1.endswith('\n') and astart + alen == len(l1) + 1:
387 378 for i in xrange(len(hunklines) - 1, -1, -1):
388 379 if hunklines[i].startswith(('-', ' ')):
389 380 if hunklines[i].startswith(' '):
390 381 skip = True
391 382 hunklines[i] += '\n'
392 383 hunklines.insert(i + 1, _missing_newline_marker)
393 384 break
394 385 if not skip and not t2.endswith('\n') and bstart + blen == len(l2) + 1:
395 386 for i in xrange(len(hunklines) - 1, -1, -1):
396 387 if hunklines[i].startswith('+'):
397 388 hunklines[i] += '\n'
398 389 hunklines.insert(i + 1, _missing_newline_marker)
399 390 break
400 391 yield hunkrange, hunklines
401 392
402 393 # bdiff.blocks gives us the matching sequences in the files. The loop
403 394 # below finds the spaces between those matching sequences and translates
404 395 # them into diff output.
405 396 #
406 397 hunk = None
407 398 ignoredlines = 0
408 399 has_hunks = False
409 400 for s, stype in allblocks(t1, t2, opts, l1, l2):
410 401 a1, a2, b1, b2 = s
411 402 if stype != '!':
412 403 if stype == '~':
413 404 # The diff context lines are based on t1 content. When
414 405 # blank lines are ignored, the new lines offsets must
415 406 # be adjusted as if equivalent blocks ('~') had the
416 407 # same sizes on both sides.
417 408 ignoredlines += (b2 - b1) - (a2 - a1)
418 409 continue
419 410 delta = []
420 411 old = l1[a1:a2]
421 412 new = l2[b1:b2]
422 413
423 414 b1 -= ignoredlines
424 415 b2 -= ignoredlines
425 416 astart = contextstart(a1)
426 417 bstart = contextstart(b1)
427 418 prev = None
428 419 if hunk:
429 420 # join with the previous hunk if it falls inside the context
430 421 if astart < hunk[1] + opts.context + 1:
431 422 prev = hunk
432 423 astart = hunk[1]
433 424 bstart = hunk[3]
434 425 else:
435 426 if not has_hunks:
436 427 has_hunks = True
437 428 yield True
438 429 for x in yieldhunk(hunk):
439 430 yield x
440 431 if prev:
441 432 # we've joined the previous hunk, record the new ending points.
442 433 hunk[1] = a2
443 434 hunk[3] = b2
444 435 delta = hunk[4]
445 436 else:
446 437 # create a new hunk
447 438 hunk = [astart, a2, bstart, b2, delta]
448 439
449 440 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
450 441 delta[len(delta):] = ['-' + x for x in old]
451 442 delta[len(delta):] = ['+' + x for x in new]
452 443
453 444 if hunk:
454 445 if not has_hunks:
455 446 has_hunks = True
456 447 yield True
457 448 for x in yieldhunk(hunk):
458 449 yield x
459 450 elif not has_hunks:
460 451 yield False
461 452
462 453 def b85diff(to, tn):
463 454 '''print base85-encoded binary diff'''
464 455 def fmtline(line):
465 456 l = len(line)
466 457 if l <= 26:
467 458 l = pycompat.bytechr(ord('A') + l - 1)
468 459 else:
469 460 l = pycompat.bytechr(l - 26 + ord('a') - 1)
470 461 return '%c%s\n' % (l, util.b85encode(line, True))
471 462
472 463 def chunk(text, csize=52):
473 464 l = len(text)
474 465 i = 0
475 466 while i < l:
476 467 yield text[i:i + csize]
477 468 i += csize
478 469
479 470 if to is None:
480 471 to = ''
481 472 if tn is None:
482 473 tn = ''
483 474
484 475 if to == tn:
485 476 return ''
486 477
487 478 # TODO: deltas
488 479 ret = []
489 480 ret.append('GIT binary patch\n')
490 481 ret.append('literal %d\n' % len(tn))
491 482 for l in chunk(zlib.compress(tn)):
492 483 ret.append(fmtline(l))
493 484 ret.append('\n')
494 485
495 486 return ''.join(ret)
496 487
497 488 def patchtext(bin):
498 489 pos = 0
499 490 t = []
500 491 while pos < len(bin):
501 492 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
502 493 pos += 12
503 494 t.append(bin[pos:pos + l])
504 495 pos += l
505 496 return "".join(t)
506 497
507 498 def patch(a, bin):
508 499 if len(a) == 0:
509 500 # skip over trivial delta header
510 501 return util.buffer(bin, 12)
511 502 return mpatch.patches(a, [bin])
512 503
513 504 # similar to difflib.SequenceMatcher.get_matching_blocks
514 505 def get_matching_blocks(a, b):
515 506 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
516 507
517 508 def trivialdiffheader(length):
518 509 return struct.pack(">lll", 0, 0, length) if length else ''
519 510
520 511 def replacediffheader(oldlen, newlen):
521 512 return struct.pack(">lll", 0, oldlen, newlen)
@@ -1,116 +1,116 b''
1 1 # policy.py - module policy logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import sys
12 12
13 13 # Rules for how modules can be loaded. Values are:
14 14 #
15 15 # c - require C extensions
16 16 # allow - allow pure Python implementation when C loading fails
17 17 # cffi - required cffi versions (implemented within pure module)
18 18 # cffi-allow - allow pure Python implementation if cffi version is missing
19 19 # py - only load pure Python modules
20 20 #
21 21 # By default, fall back to the pure modules so the in-place build can
22 22 # run without recompiling the C extensions. This will be overridden by
23 23 # __modulepolicy__ generated by setup.py.
24 24 policy = b'allow'
25 25 _packageprefs = {
26 26 # policy: (versioned package, pure package)
27 27 b'c': (r'cext', None),
28 28 b'allow': (r'cext', r'pure'),
29 29 b'cffi': (r'cffi', None),
30 30 b'cffi-allow': (r'cffi', r'pure'),
31 31 b'py': (None, r'pure'),
32 32 }
33 33
34 34 try:
35 35 from . import __modulepolicy__
36 36 policy = __modulepolicy__.modulepolicy
37 37 except ImportError:
38 38 pass
39 39
40 40 # PyPy doesn't load C extensions.
41 41 #
42 42 # The canonical way to do this is to test platform.python_implementation().
43 43 # But we don't import platform and don't bloat for it here.
44 44 if r'__pypy__' in sys.builtin_module_names:
45 45 policy = b'cffi'
46 46
47 47 # Our C extensions aren't yet compatible with Python 3. So use pure Python
48 48 # on Python 3 for now.
49 49 if sys.version_info[0] >= 3:
50 50 policy = b'py'
51 51
52 52 # Environment variable can always force settings.
53 53 if sys.version_info[0] >= 3:
54 54 if r'HGMODULEPOLICY' in os.environ:
55 55 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
56 56 else:
57 57 policy = os.environ.get(r'HGMODULEPOLICY', policy)
58 58
59 59 def _importfrom(pkgname, modname):
60 60 # from .<pkgname> import <modname> (where . is looked through this module)
61 61 fakelocals = {}
62 62 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
63 63 try:
64 64 fakelocals[modname] = mod = getattr(pkg, modname)
65 65 except AttributeError:
66 66 raise ImportError(r'cannot import name %s' % modname)
67 67 # force import; fakelocals[modname] may be replaced with the real module
68 68 getattr(mod, r'__doc__', None)
69 69 return fakelocals[modname]
70 70
71 71 # keep in sync with "version" in C modules
72 72 _cextversions = {
73 73 (r'cext', r'base85'): 1,
74 (r'cext', r'bdiff'): 1,
74 (r'cext', r'bdiff'): 2,
75 75 (r'cext', r'diffhelpers'): 1,
76 76 (r'cext', r'mpatch'): 1,
77 77 (r'cext', r'osutil'): 3,
78 78 (r'cext', r'parsers'): 4,
79 79 }
80 80
81 81 # map import request to other package or module
82 82 _modredirects = {
83 83 (r'cext', r'charencode'): (r'cext', r'parsers'),
84 84 (r'cffi', r'base85'): (r'pure', r'base85'),
85 85 (r'cffi', r'charencode'): (r'pure', r'charencode'),
86 86 (r'cffi', r'diffhelpers'): (r'pure', r'diffhelpers'),
87 87 (r'cffi', r'parsers'): (r'pure', r'parsers'),
88 88 }
89 89
90 90 def _checkmod(pkgname, modname, mod):
91 91 expected = _cextversions.get((pkgname, modname))
92 92 actual = getattr(mod, r'version', None)
93 93 if actual != expected:
94 94 raise ImportError(r'cannot import module %s.%s '
95 95 r'(expected version: %d, actual: %r)'
96 96 % (pkgname, modname, expected, actual))
97 97
98 98 def importmod(modname):
99 99 """Import module according to policy and check API version"""
100 100 try:
101 101 verpkg, purepkg = _packageprefs[policy]
102 102 except KeyError:
103 103 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
104 104 assert verpkg or purepkg
105 105 if verpkg:
106 106 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
107 107 try:
108 108 mod = _importfrom(pn, mn)
109 109 if pn == verpkg:
110 110 _checkmod(pn, mn, mod)
111 111 return mod
112 112 except ImportError:
113 113 if not purepkg:
114 114 raise
115 115 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
116 116 return _importfrom(pn, mn)
@@ -1,92 +1,102 b''
1 1 # bdiff.py - Python implementation of bdiff.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import difflib
11 11 import re
12 12 import struct
13 13
14 14 def splitnewlines(text):
15 15 '''like str.splitlines, but only split on newlines.'''
16 16 lines = [l + '\n' for l in text.split('\n')]
17 17 if lines:
18 18 if lines[-1] == '\n':
19 19 lines.pop()
20 20 else:
21 21 lines[-1] = lines[-1][:-1]
22 22 return lines
23 23
24 24 def _normalizeblocks(a, b, blocks):
25 25 prev = None
26 26 r = []
27 27 for curr in blocks:
28 28 if prev is None:
29 29 prev = curr
30 30 continue
31 31 shift = 0
32 32
33 33 a1, b1, l1 = prev
34 34 a1end = a1 + l1
35 35 b1end = b1 + l1
36 36
37 37 a2, b2, l2 = curr
38 38 a2end = a2 + l2
39 39 b2end = b2 + l2
40 40 if a1end == a2:
41 41 while (a1end + shift < a2end and
42 42 a[a1end + shift] == b[b1end + shift]):
43 43 shift += 1
44 44 elif b1end == b2:
45 45 while (b1end + shift < b2end and
46 46 a[a1end + shift] == b[b1end + shift]):
47 47 shift += 1
48 48 r.append((a1, b1, l1 + shift))
49 49 prev = a2 + shift, b2 + shift, l2 - shift
50 50 r.append(prev)
51 51 return r
52 52
53 53 def bdiff(a, b):
54 54 a = bytes(a).splitlines(True)
55 55 b = bytes(b).splitlines(True)
56 56
57 57 if not a:
58 58 s = "".join(b)
59 59 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
60 60
61 61 bin = []
62 62 p = [0]
63 63 for i in a:
64 64 p.append(p[-1] + len(i))
65 65
66 66 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
67 67 d = _normalizeblocks(a, b, d)
68 68 la = 0
69 69 lb = 0
70 70 for am, bm, size in d:
71 71 s = "".join(b[lb:bm])
72 72 if am > la or s:
73 73 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
74 74 la = am + size
75 75 lb = bm + size
76 76
77 77 return "".join(bin)
78 78
79 79 def blocks(a, b):
80 80 an = splitnewlines(a)
81 81 bn = splitnewlines(b)
82 82 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
83 83 d = _normalizeblocks(an, bn, d)
84 84 return [(i, i + n, j, j + n) for (i, j, n) in d]
85 85
86 86 def fixws(text, allws):
87 87 if allws:
88 88 text = re.sub('[ \t\r]+', '', text)
89 89 else:
90 90 text = re.sub('[ \t\r]+', ' ', text)
91 91 text = text.replace(' \n', '\n')
92 92 return text
93
94 def splitnewlines(text):
95 '''like str.splitlines, but only split on newlines.'''
96 lines = [l + '\n' for l in text.split('\n')]
97 if lines:
98 if lines[-1] == '\n':
99 lines.pop()
100 else:
101 lines[-1] = lines[-1][:-1]
102 return lines
General Comments 0
You need to be logged in to leave comments. Login now