##// END OF EJS Templates
bdiff: write a native version of splitnewlines...
Augie Fackler -
r36219:29dd37a4 default
parent child Browse files
Show More
@@ -1,213 +1,263 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #define PY_SSIZE_T_CLEAN
12 #define PY_SSIZE_T_CLEAN
13 #include <Python.h>
13 #include <Python.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stdlib.h>
15 #include <stdlib.h>
16 #include <string.h>
16 #include <string.h>
17
17
18 #include "bdiff.h"
18 #include "bdiff.h"
19 #include "bitmanipulation.h"
19 #include "bitmanipulation.h"
20 #include "util.h"
20 #include "util.h"
21
21
22 static PyObject *blocks(PyObject *self, PyObject *args)
22 static PyObject *blocks(PyObject *self, PyObject *args)
23 {
23 {
24 PyObject *sa, *sb, *rl = NULL, *m;
24 PyObject *sa, *sb, *rl = NULL, *m;
25 struct bdiff_line *a, *b;
25 struct bdiff_line *a, *b;
26 struct bdiff_hunk l, *h;
26 struct bdiff_hunk l, *h;
27 int an, bn, count, pos = 0;
27 int an, bn, count, pos = 0;
28
28
29 l.next = NULL;
29 l.next = NULL;
30
30
31 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
31 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
32 return NULL;
32 return NULL;
33
33
34 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
34 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
35 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
35 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
36
36
37 if (!a || !b)
37 if (!a || !b)
38 goto nomem;
38 goto nomem;
39
39
40 count = bdiff_diff(a, an, b, bn, &l);
40 count = bdiff_diff(a, an, b, bn, &l);
41 if (count < 0)
41 if (count < 0)
42 goto nomem;
42 goto nomem;
43
43
44 rl = PyList_New(count);
44 rl = PyList_New(count);
45 if (!rl)
45 if (!rl)
46 goto nomem;
46 goto nomem;
47
47
48 for (h = l.next; h; h = h->next) {
48 for (h = l.next; h; h = h->next) {
49 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
49 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
50 PyList_SetItem(rl, pos, m);
50 PyList_SetItem(rl, pos, m);
51 pos++;
51 pos++;
52 }
52 }
53
53
54 nomem:
54 nomem:
55 free(a);
55 free(a);
56 free(b);
56 free(b);
57 bdiff_freehunks(l.next);
57 bdiff_freehunks(l.next);
58 return rl ? rl : PyErr_NoMemory();
58 return rl ? rl : PyErr_NoMemory();
59 }
59 }
60
60
61 static PyObject *bdiff(PyObject *self, PyObject *args)
61 static PyObject *bdiff(PyObject *self, PyObject *args)
62 {
62 {
63 char *sa, *sb, *rb, *ia, *ib;
63 char *sa, *sb, *rb, *ia, *ib;
64 PyObject *result = NULL;
64 PyObject *result = NULL;
65 struct bdiff_line *al, *bl;
65 struct bdiff_line *al, *bl;
66 struct bdiff_hunk l, *h;
66 struct bdiff_hunk l, *h;
67 int an, bn, count;
67 int an, bn, count;
68 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
68 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
69 PyThreadState *_save;
69 PyThreadState *_save;
70
70
71 l.next = NULL;
71 l.next = NULL;
72
72
73 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
73 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
74 return NULL;
74 return NULL;
75
75
76 if (la > UINT_MAX || lb > UINT_MAX) {
76 if (la > UINT_MAX || lb > UINT_MAX) {
77 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
77 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
78 return NULL;
78 return NULL;
79 }
79 }
80
80
81 _save = PyEval_SaveThread();
81 _save = PyEval_SaveThread();
82
82
83 lmax = la > lb ? lb : la;
83 lmax = la > lb ? lb : la;
84 for (ia = sa, ib = sb; li < lmax && *ia == *ib; ++li, ++ia, ++ib)
84 for (ia = sa, ib = sb; li < lmax && *ia == *ib; ++li, ++ia, ++ib)
85 if (*ia == '\n')
85 if (*ia == '\n')
86 lcommon = li + 1;
86 lcommon = li + 1;
87 /* we can almost add: if (li == lmax) lcommon = li; */
87 /* we can almost add: if (li == lmax) lcommon = li; */
88
88
89 an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
89 an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
90 bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
90 bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
91 if (!al || !bl)
91 if (!al || !bl)
92 goto nomem;
92 goto nomem;
93
93
94 count = bdiff_diff(al, an, bl, bn, &l);
94 count = bdiff_diff(al, an, bl, bn, &l);
95 if (count < 0)
95 if (count < 0)
96 goto nomem;
96 goto nomem;
97
97
98 /* calculate length of output */
98 /* calculate length of output */
99 la = lb = 0;
99 la = lb = 0;
100 for (h = l.next; h; h = h->next) {
100 for (h = l.next; h; h = h->next) {
101 if (h->a1 != la || h->b1 != lb)
101 if (h->a1 != la || h->b1 != lb)
102 len += 12 + bl[h->b1].l - bl[lb].l;
102 len += 12 + bl[h->b1].l - bl[lb].l;
103 la = h->a2;
103 la = h->a2;
104 lb = h->b2;
104 lb = h->b2;
105 }
105 }
106 PyEval_RestoreThread(_save);
106 PyEval_RestoreThread(_save);
107 _save = NULL;
107 _save = NULL;
108
108
109 result = PyBytes_FromStringAndSize(NULL, len);
109 result = PyBytes_FromStringAndSize(NULL, len);
110
110
111 if (!result)
111 if (!result)
112 goto nomem;
112 goto nomem;
113
113
114 /* build binary patch */
114 /* build binary patch */
115 rb = PyBytes_AsString(result);
115 rb = PyBytes_AsString(result);
116 la = lb = 0;
116 la = lb = 0;
117
117
118 for (h = l.next; h; h = h->next) {
118 for (h = l.next; h; h = h->next) {
119 if (h->a1 != la || h->b1 != lb) {
119 if (h->a1 != la || h->b1 != lb) {
120 len = bl[h->b1].l - bl[lb].l;
120 len = bl[h->b1].l - bl[lb].l;
121 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
121 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
122 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
122 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
123 rb + 4);
123 rb + 4);
124 putbe32((uint32_t)len, rb + 8);
124 putbe32((uint32_t)len, rb + 8);
125 memcpy(rb + 12, bl[lb].l, len);
125 memcpy(rb + 12, bl[lb].l, len);
126 rb += 12 + len;
126 rb += 12 + len;
127 }
127 }
128 la = h->a2;
128 la = h->a2;
129 lb = h->b2;
129 lb = h->b2;
130 }
130 }
131
131
132 nomem:
132 nomem:
133 if (_save)
133 if (_save)
134 PyEval_RestoreThread(_save);
134 PyEval_RestoreThread(_save);
135 free(al);
135 free(al);
136 free(bl);
136 free(bl);
137 bdiff_freehunks(l.next);
137 bdiff_freehunks(l.next);
138 return result ? result : PyErr_NoMemory();
138 return result ? result : PyErr_NoMemory();
139 }
139 }
140
140
141 /*
141 /*
142 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
142 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
143 * reduce whitespace sequences to a single space and trim remaining whitespace
143 * reduce whitespace sequences to a single space and trim remaining whitespace
144 * from end of lines.
144 * from end of lines.
145 */
145 */
146 static PyObject *fixws(PyObject *self, PyObject *args)
146 static PyObject *fixws(PyObject *self, PyObject *args)
147 {
147 {
148 PyObject *s, *result = NULL;
148 PyObject *s, *result = NULL;
149 char allws, c;
149 char allws, c;
150 const char *r;
150 const char *r;
151 Py_ssize_t i, rlen, wlen = 0;
151 Py_ssize_t i, rlen, wlen = 0;
152 char *w;
152 char *w;
153
153
154 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
154 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
155 return NULL;
155 return NULL;
156 r = PyBytes_AsString(s);
156 r = PyBytes_AsString(s);
157 rlen = PyBytes_Size(s);
157 rlen = PyBytes_Size(s);
158
158
159 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
159 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
160 if (!w)
160 if (!w)
161 goto nomem;
161 goto nomem;
162
162
163 for (i = 0; i != rlen; i++) {
163 for (i = 0; i != rlen; i++) {
164 c = r[i];
164 c = r[i];
165 if (c == ' ' || c == '\t' || c == '\r') {
165 if (c == ' ' || c == '\t' || c == '\r') {
166 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
166 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
167 w[wlen++] = ' ';
167 w[wlen++] = ' ';
168 } else if (c == '\n' && !allws && wlen > 0 &&
168 } else if (c == '\n' && !allws && wlen > 0 &&
169 w[wlen - 1] == ' ') {
169 w[wlen - 1] == ' ') {
170 w[wlen - 1] = '\n';
170 w[wlen - 1] = '\n';
171 } else {
171 } else {
172 w[wlen++] = c;
172 w[wlen++] = c;
173 }
173 }
174 }
174 }
175
175
176 result = PyBytes_FromStringAndSize(w, wlen);
176 result = PyBytes_FromStringAndSize(w, wlen);
177
177
178 nomem:
178 nomem:
179 PyMem_Free(w);
179 PyMem_Free(w);
180 return result ? result : PyErr_NoMemory();
180 return result ? result : PyErr_NoMemory();
181 }
181 }
182
182
183 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
184 const char *source, Py_ssize_t len)
185 {
186 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
187 if (sliced == NULL)
188 return false;
189 PyList_SET_ITEM(list, destidx, sliced);
190 return true;
191 }
192
193 static PyObject *splitnewlines(PyObject *self, PyObject *args)
194 {
195 const char *text;
196 Py_ssize_t nelts = 0, size, i, start = 0;
197 PyObject *result = NULL;
198
199 if (!PyArg_ParseTuple(args, "s#", &text, &size)) {
200 goto abort;
201 }
202 if (!size) {
203 return PyList_New(0);
204 }
205 /* This loops to size-1 because if the last byte is a newline,
206 * we don't want to perform a split there. */
207 for (i = 0; i < size - 1; ++i) {
208 if (text[i] == '\n') {
209 ++nelts;
210 }
211 }
212 if ((result = PyList_New(nelts + 1)) == NULL)
213 goto abort;
214 nelts = 0;
215 for (i = 0; i < size - 1; ++i) {
216 if (text[i] == '\n') {
217 if (!sliceintolist(result, nelts++, text + start,
218 i - start + 1))
219 goto abort;
220 start = i + 1;
221 }
222 }
223 if (!sliceintolist(result, nelts++, text + start, size - start))
224 goto abort;
225 return result;
226 abort:
227 Py_XDECREF(result);
228 return NULL;
229 }
230
183 static char mdiff_doc[] = "Efficient binary diff.";
231 static char mdiff_doc[] = "Efficient binary diff.";
184
232
185 static PyMethodDef methods[] = {
233 static PyMethodDef methods[] = {
186 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
234 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
187 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
235 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
188 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
236 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
237 {"splitnewlines", splitnewlines, METH_VARARGS,
238 "like str.splitlines, but only split on newlines\n"},
189 {NULL, NULL},
239 {NULL, NULL},
190 };
240 };
191
241
192 static const int version = 1;
242 static const int version = 2;
193
243
194 #ifdef IS_PY3K
244 #ifdef IS_PY3K
195 static struct PyModuleDef bdiff_module = {
245 static struct PyModuleDef bdiff_module = {
196 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
246 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
197 };
247 };
198
248
199 PyMODINIT_FUNC PyInit_bdiff(void)
249 PyMODINIT_FUNC PyInit_bdiff(void)
200 {
250 {
201 PyObject *m;
251 PyObject *m;
202 m = PyModule_Create(&bdiff_module);
252 m = PyModule_Create(&bdiff_module);
203 PyModule_AddIntConstant(m, "version", version);
253 PyModule_AddIntConstant(m, "version", version);
204 return m;
254 return m;
205 }
255 }
206 #else
256 #else
207 PyMODINIT_FUNC initbdiff(void)
257 PyMODINIT_FUNC initbdiff(void)
208 {
258 {
209 PyObject *m;
259 PyObject *m;
210 m = Py_InitModule3("bdiff", methods, mdiff_doc);
260 m = Py_InitModule3("bdiff", methods, mdiff_doc);
211 PyModule_AddIntConstant(m, "version", version);
261 PyModule_AddIntConstant(m, "version", version);
212 }
262 }
213 #endif
263 #endif
@@ -1,521 +1,512 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import re
10 import re
11 import struct
11 import struct
12 import zlib
12 import zlib
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 policy,
17 policy,
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 _missing_newline_marker = "\\ No newline at end of file\n"
22 _missing_newline_marker = "\\ No newline at end of file\n"
23
23
24 bdiff = policy.importmod(r'bdiff')
24 bdiff = policy.importmod(r'bdiff')
25 mpatch = policy.importmod(r'mpatch')
25 mpatch = policy.importmod(r'mpatch')
26
26
27 blocks = bdiff.blocks
27 blocks = bdiff.blocks
28 fixws = bdiff.fixws
28 fixws = bdiff.fixws
29 patches = mpatch.patches
29 patches = mpatch.patches
30 patchedsize = mpatch.patchedsize
30 patchedsize = mpatch.patchedsize
31 textdiff = bdiff.bdiff
31 textdiff = bdiff.bdiff
32
32 splitnewlines = bdiff.splitnewlines
33 def splitnewlines(text):
34 '''like str.splitlines, but only split on newlines.'''
35 lines = [l + '\n' for l in text.split('\n')]
36 if lines:
37 if lines[-1] == '\n':
38 lines.pop()
39 else:
40 lines[-1] = lines[-1][:-1]
41 return lines
42
33
43 class diffopts(object):
34 class diffopts(object):
44 '''context is the number of context lines
35 '''context is the number of context lines
45 text treats all files as text
36 text treats all files as text
46 showfunc enables diff -p output
37 showfunc enables diff -p output
47 git enables the git extended patch format
38 git enables the git extended patch format
48 nodates removes dates from diff headers
39 nodates removes dates from diff headers
49 nobinary ignores binary files
40 nobinary ignores binary files
50 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
41 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
51 ignorews ignores all whitespace changes in the diff
42 ignorews ignores all whitespace changes in the diff
52 ignorewsamount ignores changes in the amount of whitespace
43 ignorewsamount ignores changes in the amount of whitespace
53 ignoreblanklines ignores changes whose lines are all blank
44 ignoreblanklines ignores changes whose lines are all blank
54 upgrade generates git diffs to avoid data loss
45 upgrade generates git diffs to avoid data loss
55 '''
46 '''
56
47
57 defaults = {
48 defaults = {
58 'context': 3,
49 'context': 3,
59 'text': False,
50 'text': False,
60 'showfunc': False,
51 'showfunc': False,
61 'git': False,
52 'git': False,
62 'nodates': False,
53 'nodates': False,
63 'nobinary': False,
54 'nobinary': False,
64 'noprefix': False,
55 'noprefix': False,
65 'index': 0,
56 'index': 0,
66 'ignorews': False,
57 'ignorews': False,
67 'ignorewsamount': False,
58 'ignorewsamount': False,
68 'ignorewseol': False,
59 'ignorewseol': False,
69 'ignoreblanklines': False,
60 'ignoreblanklines': False,
70 'upgrade': False,
61 'upgrade': False,
71 'showsimilarity': False,
62 'showsimilarity': False,
72 'worddiff': False,
63 'worddiff': False,
73 }
64 }
74
65
75 def __init__(self, **opts):
66 def __init__(self, **opts):
76 opts = pycompat.byteskwargs(opts)
67 opts = pycompat.byteskwargs(opts)
77 for k in self.defaults.keys():
68 for k in self.defaults.keys():
78 v = opts.get(k)
69 v = opts.get(k)
79 if v is None:
70 if v is None:
80 v = self.defaults[k]
71 v = self.defaults[k]
81 setattr(self, k, v)
72 setattr(self, k, v)
82
73
83 try:
74 try:
84 self.context = int(self.context)
75 self.context = int(self.context)
85 except ValueError:
76 except ValueError:
86 raise error.Abort(_('diff context lines count must be '
77 raise error.Abort(_('diff context lines count must be '
87 'an integer, not %r') % self.context)
78 'an integer, not %r') % self.context)
88
79
89 def copy(self, **kwargs):
80 def copy(self, **kwargs):
90 opts = dict((k, getattr(self, k)) for k in self.defaults)
81 opts = dict((k, getattr(self, k)) for k in self.defaults)
91 opts = pycompat.strkwargs(opts)
82 opts = pycompat.strkwargs(opts)
92 opts.update(kwargs)
83 opts.update(kwargs)
93 return diffopts(**opts)
84 return diffopts(**opts)
94
85
95 defaultopts = diffopts()
86 defaultopts = diffopts()
96
87
97 def wsclean(opts, text, blank=True):
88 def wsclean(opts, text, blank=True):
98 if opts.ignorews:
89 if opts.ignorews:
99 text = bdiff.fixws(text, 1)
90 text = bdiff.fixws(text, 1)
100 elif opts.ignorewsamount:
91 elif opts.ignorewsamount:
101 text = bdiff.fixws(text, 0)
92 text = bdiff.fixws(text, 0)
102 if blank and opts.ignoreblanklines:
93 if blank and opts.ignoreblanklines:
103 text = re.sub('\n+', '\n', text).strip('\n')
94 text = re.sub('\n+', '\n', text).strip('\n')
104 if opts.ignorewseol:
95 if opts.ignorewseol:
105 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
96 text = re.sub(br'[ \t\r\f]+\n', r'\n', text)
106 return text
97 return text
107
98
108 def splitblock(base1, lines1, base2, lines2, opts):
99 def splitblock(base1, lines1, base2, lines2, opts):
109 # The input lines matches except for interwoven blank lines. We
100 # The input lines matches except for interwoven blank lines. We
110 # transform it into a sequence of matching blocks and blank blocks.
101 # transform it into a sequence of matching blocks and blank blocks.
111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
102 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
103 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
113 s1, e1 = 0, len(lines1)
104 s1, e1 = 0, len(lines1)
114 s2, e2 = 0, len(lines2)
105 s2, e2 = 0, len(lines2)
115 while s1 < e1 or s2 < e2:
106 while s1 < e1 or s2 < e2:
116 i1, i2, btype = s1, s2, '='
107 i1, i2, btype = s1, s2, '='
117 if (i1 >= e1 or lines1[i1] == 0
108 if (i1 >= e1 or lines1[i1] == 0
118 or i2 >= e2 or lines2[i2] == 0):
109 or i2 >= e2 or lines2[i2] == 0):
119 # Consume the block of blank lines
110 # Consume the block of blank lines
120 btype = '~'
111 btype = '~'
121 while i1 < e1 and lines1[i1] == 0:
112 while i1 < e1 and lines1[i1] == 0:
122 i1 += 1
113 i1 += 1
123 while i2 < e2 and lines2[i2] == 0:
114 while i2 < e2 and lines2[i2] == 0:
124 i2 += 1
115 i2 += 1
125 else:
116 else:
126 # Consume the matching lines
117 # Consume the matching lines
127 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
118 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
128 i1 += 1
119 i1 += 1
129 i2 += 1
120 i2 += 1
130 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
121 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
131 s1 = i1
122 s1 = i1
132 s2 = i2
123 s2 = i2
133
124
134 def hunkinrange(hunk, linerange):
125 def hunkinrange(hunk, linerange):
135 """Return True if `hunk` defined as (start, length) is in `linerange`
126 """Return True if `hunk` defined as (start, length) is in `linerange`
136 defined as (lowerbound, upperbound).
127 defined as (lowerbound, upperbound).
137
128
138 >>> hunkinrange((5, 10), (2, 7))
129 >>> hunkinrange((5, 10), (2, 7))
139 True
130 True
140 >>> hunkinrange((5, 10), (6, 12))
131 >>> hunkinrange((5, 10), (6, 12))
141 True
132 True
142 >>> hunkinrange((5, 10), (13, 17))
133 >>> hunkinrange((5, 10), (13, 17))
143 True
134 True
144 >>> hunkinrange((5, 10), (3, 17))
135 >>> hunkinrange((5, 10), (3, 17))
145 True
136 True
146 >>> hunkinrange((5, 10), (1, 3))
137 >>> hunkinrange((5, 10), (1, 3))
147 False
138 False
148 >>> hunkinrange((5, 10), (18, 20))
139 >>> hunkinrange((5, 10), (18, 20))
149 False
140 False
150 >>> hunkinrange((5, 10), (1, 5))
141 >>> hunkinrange((5, 10), (1, 5))
151 False
142 False
152 >>> hunkinrange((5, 10), (15, 27))
143 >>> hunkinrange((5, 10), (15, 27))
153 False
144 False
154 """
145 """
155 start, length = hunk
146 start, length = hunk
156 lowerbound, upperbound = linerange
147 lowerbound, upperbound = linerange
157 return lowerbound < start + length and start < upperbound
148 return lowerbound < start + length and start < upperbound
158
149
159 def blocksinrange(blocks, rangeb):
150 def blocksinrange(blocks, rangeb):
160 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
151 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
161 `rangeb` from ``(b1, b2)`` point of view.
152 `rangeb` from ``(b1, b2)`` point of view.
162
153
163 Return `filteredblocks, rangea` where:
154 Return `filteredblocks, rangea` where:
164
155
165 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
156 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
166 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
157 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
167 block ``(b1, b2)`` being inside `rangeb` if
158 block ``(b1, b2)`` being inside `rangeb` if
168 ``rangeb[0] < b2 and b1 < rangeb[1]``;
159 ``rangeb[0] < b2 and b1 < rangeb[1]``;
169 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
160 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
170 """
161 """
171 lbb, ubb = rangeb
162 lbb, ubb = rangeb
172 lba, uba = None, None
163 lba, uba = None, None
173 filteredblocks = []
164 filteredblocks = []
174 for block in blocks:
165 for block in blocks:
175 (a1, a2, b1, b2), stype = block
166 (a1, a2, b1, b2), stype = block
176 if lbb >= b1 and ubb <= b2 and stype == '=':
167 if lbb >= b1 and ubb <= b2 and stype == '=':
177 # rangeb is within a single "=" hunk, restrict back linerange1
168 # rangeb is within a single "=" hunk, restrict back linerange1
178 # by offsetting rangeb
169 # by offsetting rangeb
179 lba = lbb - b1 + a1
170 lba = lbb - b1 + a1
180 uba = ubb - b1 + a1
171 uba = ubb - b1 + a1
181 else:
172 else:
182 if b1 <= lbb < b2:
173 if b1 <= lbb < b2:
183 if stype == '=':
174 if stype == '=':
184 lba = a2 - (b2 - lbb)
175 lba = a2 - (b2 - lbb)
185 else:
176 else:
186 lba = a1
177 lba = a1
187 if b1 < ubb <= b2:
178 if b1 < ubb <= b2:
188 if stype == '=':
179 if stype == '=':
189 uba = a1 + (ubb - b1)
180 uba = a1 + (ubb - b1)
190 else:
181 else:
191 uba = a2
182 uba = a2
192 if hunkinrange((b1, (b2 - b1)), rangeb):
183 if hunkinrange((b1, (b2 - b1)), rangeb):
193 filteredblocks.append(block)
184 filteredblocks.append(block)
194 if lba is None or uba is None or uba < lba:
185 if lba is None or uba is None or uba < lba:
195 raise error.Abort(_('line range exceeds file size'))
186 raise error.Abort(_('line range exceeds file size'))
196 return filteredblocks, (lba, uba)
187 return filteredblocks, (lba, uba)
197
188
198 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
189 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
199 """Return (block, type) tuples, where block is an mdiff.blocks
190 """Return (block, type) tuples, where block is an mdiff.blocks
200 line entry. type is '=' for blocks matching exactly one another
191 line entry. type is '=' for blocks matching exactly one another
201 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
192 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
202 matching only after having filtered blank lines.
193 matching only after having filtered blank lines.
203 line1 and line2 are text1 and text2 split with splitnewlines() if
194 line1 and line2 are text1 and text2 split with splitnewlines() if
204 they are already available.
195 they are already available.
205 """
196 """
206 if opts is None:
197 if opts is None:
207 opts = defaultopts
198 opts = defaultopts
208 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
199 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
209 text1 = wsclean(opts, text1, False)
200 text1 = wsclean(opts, text1, False)
210 text2 = wsclean(opts, text2, False)
201 text2 = wsclean(opts, text2, False)
211 diff = bdiff.blocks(text1, text2)
202 diff = bdiff.blocks(text1, text2)
212 for i, s1 in enumerate(diff):
203 for i, s1 in enumerate(diff):
213 # The first match is special.
204 # The first match is special.
214 # we've either found a match starting at line 0 or a match later
205 # we've either found a match starting at line 0 or a match later
215 # in the file. If it starts later, old and new below will both be
206 # in the file. If it starts later, old and new below will both be
216 # empty and we'll continue to the next match.
207 # empty and we'll continue to the next match.
217 if i > 0:
208 if i > 0:
218 s = diff[i - 1]
209 s = diff[i - 1]
219 else:
210 else:
220 s = [0, 0, 0, 0]
211 s = [0, 0, 0, 0]
221 s = [s[1], s1[0], s[3], s1[2]]
212 s = [s[1], s1[0], s[3], s1[2]]
222
213
223 # bdiff sometimes gives huge matches past eof, this check eats them,
214 # bdiff sometimes gives huge matches past eof, this check eats them,
224 # and deals with the special first match case described above
215 # and deals with the special first match case described above
225 if s[0] != s[1] or s[2] != s[3]:
216 if s[0] != s[1] or s[2] != s[3]:
226 type = '!'
217 type = '!'
227 if opts.ignoreblanklines:
218 if opts.ignoreblanklines:
228 if lines1 is None:
219 if lines1 is None:
229 lines1 = splitnewlines(text1)
220 lines1 = splitnewlines(text1)
230 if lines2 is None:
221 if lines2 is None:
231 lines2 = splitnewlines(text2)
222 lines2 = splitnewlines(text2)
232 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
223 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
233 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
224 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
234 if old == new:
225 if old == new:
235 type = '~'
226 type = '~'
236 yield s, type
227 yield s, type
237 yield s1, '='
228 yield s1, '='
238
229
239 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
230 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
240 """Return a unified diff as a (headers, hunks) tuple.
231 """Return a unified diff as a (headers, hunks) tuple.
241
232
242 If the diff is not null, `headers` is a list with unified diff header
233 If the diff is not null, `headers` is a list with unified diff header
243 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
234 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
244 (hunkrange, hunklines) coming from _unidiff().
235 (hunkrange, hunklines) coming from _unidiff().
245 Otherwise, `headers` and `hunks` are empty.
236 Otherwise, `headers` and `hunks` are empty.
246
237
247 Set binary=True if either a or b should be taken as a binary file.
238 Set binary=True if either a or b should be taken as a binary file.
248 """
239 """
249 def datetag(date, fn=None):
240 def datetag(date, fn=None):
250 if not opts.git and not opts.nodates:
241 if not opts.git and not opts.nodates:
251 return '\t%s' % date
242 return '\t%s' % date
252 if fn and ' ' in fn:
243 if fn and ' ' in fn:
253 return '\t'
244 return '\t'
254 return ''
245 return ''
255
246
256 sentinel = [], ()
247 sentinel = [], ()
257 if not a and not b:
248 if not a and not b:
258 return sentinel
249 return sentinel
259
250
260 if opts.noprefix:
251 if opts.noprefix:
261 aprefix = bprefix = ''
252 aprefix = bprefix = ''
262 else:
253 else:
263 aprefix = 'a/'
254 aprefix = 'a/'
264 bprefix = 'b/'
255 bprefix = 'b/'
265
256
266 epoch = util.datestr((0, 0))
257 epoch = util.datestr((0, 0))
267
258
268 fn1 = util.pconvert(fn1)
259 fn1 = util.pconvert(fn1)
269 fn2 = util.pconvert(fn2)
260 fn2 = util.pconvert(fn2)
270
261
271 if binary:
262 if binary:
272 if a and b and len(a) == len(b) and a == b:
263 if a and b and len(a) == len(b) and a == b:
273 return sentinel
264 return sentinel
274 headerlines = []
265 headerlines = []
275 hunks = (None, ['Binary file %s has changed\n' % fn1]),
266 hunks = (None, ['Binary file %s has changed\n' % fn1]),
276 elif not a:
267 elif not a:
277 without_newline = not b.endswith('\n')
268 without_newline = not b.endswith('\n')
278 b = splitnewlines(b)
269 b = splitnewlines(b)
279 if a is None:
270 if a is None:
280 l1 = '--- /dev/null%s' % datetag(epoch)
271 l1 = '--- /dev/null%s' % datetag(epoch)
281 else:
272 else:
282 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
273 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
283 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
274 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
284 headerlines = [l1, l2]
275 headerlines = [l1, l2]
285 size = len(b)
276 size = len(b)
286 hunkrange = (0, 0, 1, size)
277 hunkrange = (0, 0, 1, size)
287 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
278 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
288 if without_newline:
279 if without_newline:
289 hunklines[-1] += '\n'
280 hunklines[-1] += '\n'
290 hunklines.append(_missing_newline_marker)
281 hunklines.append(_missing_newline_marker)
291 hunks = (hunkrange, hunklines),
282 hunks = (hunkrange, hunklines),
292 elif not b:
283 elif not b:
293 without_newline = not a.endswith('\n')
284 without_newline = not a.endswith('\n')
294 a = splitnewlines(a)
285 a = splitnewlines(a)
295 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
286 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
296 if b is None:
287 if b is None:
297 l2 = '+++ /dev/null%s' % datetag(epoch)
288 l2 = '+++ /dev/null%s' % datetag(epoch)
298 else:
289 else:
299 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
290 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
300 headerlines = [l1, l2]
291 headerlines = [l1, l2]
301 size = len(a)
292 size = len(a)
302 hunkrange = (1, size, 0, 0)
293 hunkrange = (1, size, 0, 0)
303 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
294 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
304 if without_newline:
295 if without_newline:
305 hunklines[-1] += '\n'
296 hunklines[-1] += '\n'
306 hunklines.append(_missing_newline_marker)
297 hunklines.append(_missing_newline_marker)
307 hunks = (hunkrange, hunklines),
298 hunks = (hunkrange, hunklines),
308 else:
299 else:
309 hunks = _unidiff(a, b, opts=opts)
300 hunks = _unidiff(a, b, opts=opts)
310 if not next(hunks):
301 if not next(hunks):
311 return sentinel
302 return sentinel
312
303
313 headerlines = [
304 headerlines = [
314 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
305 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
315 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
306 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
316 ]
307 ]
317
308
318 return headerlines, hunks
309 return headerlines, hunks
319
310
320 def _unidiff(t1, t2, opts=defaultopts):
311 def _unidiff(t1, t2, opts=defaultopts):
321 """Yield hunks of a headerless unified diff from t1 and t2 texts.
312 """Yield hunks of a headerless unified diff from t1 and t2 texts.
322
313
323 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
314 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
324 tuple (s1, l1, s2, l2) representing the range information of the hunk to
315 tuple (s1, l1, s2, l2) representing the range information of the hunk to
325 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
316 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
326 of the hunk combining said header followed by line additions and
317 of the hunk combining said header followed by line additions and
327 deletions.
318 deletions.
328
319
329 The hunks are prefixed with a bool.
320 The hunks are prefixed with a bool.
330 """
321 """
331 l1 = splitnewlines(t1)
322 l1 = splitnewlines(t1)
332 l2 = splitnewlines(t2)
323 l2 = splitnewlines(t2)
333 def contextend(l, len):
324 def contextend(l, len):
334 ret = l + opts.context
325 ret = l + opts.context
335 if ret > len:
326 if ret > len:
336 ret = len
327 ret = len
337 return ret
328 return ret
338
329
339 def contextstart(l):
330 def contextstart(l):
340 ret = l - opts.context
331 ret = l - opts.context
341 if ret < 0:
332 if ret < 0:
342 return 0
333 return 0
343 return ret
334 return ret
344
335
345 lastfunc = [0, '']
336 lastfunc = [0, '']
346 def yieldhunk(hunk):
337 def yieldhunk(hunk):
347 (astart, a2, bstart, b2, delta) = hunk
338 (astart, a2, bstart, b2, delta) = hunk
348 aend = contextend(a2, len(l1))
339 aend = contextend(a2, len(l1))
349 alen = aend - astart
340 alen = aend - astart
350 blen = b2 - bstart + aend - a2
341 blen = b2 - bstart + aend - a2
351
342
352 func = ""
343 func = ""
353 if opts.showfunc:
344 if opts.showfunc:
354 lastpos, func = lastfunc
345 lastpos, func = lastfunc
355 # walk backwards from the start of the context up to the start of
346 # walk backwards from the start of the context up to the start of
356 # the previous hunk context until we find a line starting with an
347 # the previous hunk context until we find a line starting with an
357 # alphanumeric char.
348 # alphanumeric char.
358 for i in xrange(astart - 1, lastpos - 1, -1):
349 for i in xrange(astart - 1, lastpos - 1, -1):
359 if l1[i][0:1].isalnum():
350 if l1[i][0:1].isalnum():
360 func = ' ' + l1[i].rstrip()[:40]
351 func = ' ' + l1[i].rstrip()[:40]
361 lastfunc[1] = func
352 lastfunc[1] = func
362 break
353 break
363 # by recording this hunk's starting point as the next place to
354 # by recording this hunk's starting point as the next place to
364 # start looking for function lines, we avoid reading any line in
355 # start looking for function lines, we avoid reading any line in
365 # the file more than once.
356 # the file more than once.
366 lastfunc[0] = astart
357 lastfunc[0] = astart
367
358
368 # zero-length hunk ranges report their start line as one less
359 # zero-length hunk ranges report their start line as one less
369 if alen:
360 if alen:
370 astart += 1
361 astart += 1
371 if blen:
362 if blen:
372 bstart += 1
363 bstart += 1
373
364
374 hunkrange = astart, alen, bstart, blen
365 hunkrange = astart, alen, bstart, blen
375 hunklines = (
366 hunklines = (
376 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
367 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
377 + delta
368 + delta
378 + [' ' + l1[x] for x in xrange(a2, aend)]
369 + [' ' + l1[x] for x in xrange(a2, aend)]
379 )
370 )
380 # If either file ends without a newline and the last line of
371 # If either file ends without a newline and the last line of
381 # that file is part of a hunk, a marker is printed. If the
372 # that file is part of a hunk, a marker is printed. If the
382 # last line of both files is identical and neither ends in
373 # last line of both files is identical and neither ends in
383 # a newline, print only one marker. That's the only case in
374 # a newline, print only one marker. That's the only case in
384 # which the hunk can end in a shared line without a newline.
375 # which the hunk can end in a shared line without a newline.
385 skip = False
376 skip = False
386 if not t1.endswith('\n') and astart + alen == len(l1) + 1:
377 if not t1.endswith('\n') and astart + alen == len(l1) + 1:
387 for i in xrange(len(hunklines) - 1, -1, -1):
378 for i in xrange(len(hunklines) - 1, -1, -1):
388 if hunklines[i].startswith(('-', ' ')):
379 if hunklines[i].startswith(('-', ' ')):
389 if hunklines[i].startswith(' '):
380 if hunklines[i].startswith(' '):
390 skip = True
381 skip = True
391 hunklines[i] += '\n'
382 hunklines[i] += '\n'
392 hunklines.insert(i + 1, _missing_newline_marker)
383 hunklines.insert(i + 1, _missing_newline_marker)
393 break
384 break
394 if not skip and not t2.endswith('\n') and bstart + blen == len(l2) + 1:
385 if not skip and not t2.endswith('\n') and bstart + blen == len(l2) + 1:
395 for i in xrange(len(hunklines) - 1, -1, -1):
386 for i in xrange(len(hunklines) - 1, -1, -1):
396 if hunklines[i].startswith('+'):
387 if hunklines[i].startswith('+'):
397 hunklines[i] += '\n'
388 hunklines[i] += '\n'
398 hunklines.insert(i + 1, _missing_newline_marker)
389 hunklines.insert(i + 1, _missing_newline_marker)
399 break
390 break
400 yield hunkrange, hunklines
391 yield hunkrange, hunklines
401
392
402 # bdiff.blocks gives us the matching sequences in the files. The loop
393 # bdiff.blocks gives us the matching sequences in the files. The loop
403 # below finds the spaces between those matching sequences and translates
394 # below finds the spaces between those matching sequences and translates
404 # them into diff output.
395 # them into diff output.
405 #
396 #
406 hunk = None
397 hunk = None
407 ignoredlines = 0
398 ignoredlines = 0
408 has_hunks = False
399 has_hunks = False
409 for s, stype in allblocks(t1, t2, opts, l1, l2):
400 for s, stype in allblocks(t1, t2, opts, l1, l2):
410 a1, a2, b1, b2 = s
401 a1, a2, b1, b2 = s
411 if stype != '!':
402 if stype != '!':
412 if stype == '~':
403 if stype == '~':
413 # The diff context lines are based on t1 content. When
404 # The diff context lines are based on t1 content. When
414 # blank lines are ignored, the new lines offsets must
405 # blank lines are ignored, the new lines offsets must
415 # be adjusted as if equivalent blocks ('~') had the
406 # be adjusted as if equivalent blocks ('~') had the
416 # same sizes on both sides.
407 # same sizes on both sides.
417 ignoredlines += (b2 - b1) - (a2 - a1)
408 ignoredlines += (b2 - b1) - (a2 - a1)
418 continue
409 continue
419 delta = []
410 delta = []
420 old = l1[a1:a2]
411 old = l1[a1:a2]
421 new = l2[b1:b2]
412 new = l2[b1:b2]
422
413
423 b1 -= ignoredlines
414 b1 -= ignoredlines
424 b2 -= ignoredlines
415 b2 -= ignoredlines
425 astart = contextstart(a1)
416 astart = contextstart(a1)
426 bstart = contextstart(b1)
417 bstart = contextstart(b1)
427 prev = None
418 prev = None
428 if hunk:
419 if hunk:
429 # join with the previous hunk if it falls inside the context
420 # join with the previous hunk if it falls inside the context
430 if astart < hunk[1] + opts.context + 1:
421 if astart < hunk[1] + opts.context + 1:
431 prev = hunk
422 prev = hunk
432 astart = hunk[1]
423 astart = hunk[1]
433 bstart = hunk[3]
424 bstart = hunk[3]
434 else:
425 else:
435 if not has_hunks:
426 if not has_hunks:
436 has_hunks = True
427 has_hunks = True
437 yield True
428 yield True
438 for x in yieldhunk(hunk):
429 for x in yieldhunk(hunk):
439 yield x
430 yield x
440 if prev:
431 if prev:
441 # we've joined the previous hunk, record the new ending points.
432 # we've joined the previous hunk, record the new ending points.
442 hunk[1] = a2
433 hunk[1] = a2
443 hunk[3] = b2
434 hunk[3] = b2
444 delta = hunk[4]
435 delta = hunk[4]
445 else:
436 else:
446 # create a new hunk
437 # create a new hunk
447 hunk = [astart, a2, bstart, b2, delta]
438 hunk = [astart, a2, bstart, b2, delta]
448
439
449 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
440 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
450 delta[len(delta):] = ['-' + x for x in old]
441 delta[len(delta):] = ['-' + x for x in old]
451 delta[len(delta):] = ['+' + x for x in new]
442 delta[len(delta):] = ['+' + x for x in new]
452
443
453 if hunk:
444 if hunk:
454 if not has_hunks:
445 if not has_hunks:
455 has_hunks = True
446 has_hunks = True
456 yield True
447 yield True
457 for x in yieldhunk(hunk):
448 for x in yieldhunk(hunk):
458 yield x
449 yield x
459 elif not has_hunks:
450 elif not has_hunks:
460 yield False
451 yield False
461
452
462 def b85diff(to, tn):
453 def b85diff(to, tn):
463 '''print base85-encoded binary diff'''
454 '''print base85-encoded binary diff'''
464 def fmtline(line):
455 def fmtline(line):
465 l = len(line)
456 l = len(line)
466 if l <= 26:
457 if l <= 26:
467 l = pycompat.bytechr(ord('A') + l - 1)
458 l = pycompat.bytechr(ord('A') + l - 1)
468 else:
459 else:
469 l = pycompat.bytechr(l - 26 + ord('a') - 1)
460 l = pycompat.bytechr(l - 26 + ord('a') - 1)
470 return '%c%s\n' % (l, util.b85encode(line, True))
461 return '%c%s\n' % (l, util.b85encode(line, True))
471
462
472 def chunk(text, csize=52):
463 def chunk(text, csize=52):
473 l = len(text)
464 l = len(text)
474 i = 0
465 i = 0
475 while i < l:
466 while i < l:
476 yield text[i:i + csize]
467 yield text[i:i + csize]
477 i += csize
468 i += csize
478
469
479 if to is None:
470 if to is None:
480 to = ''
471 to = ''
481 if tn is None:
472 if tn is None:
482 tn = ''
473 tn = ''
483
474
484 if to == tn:
475 if to == tn:
485 return ''
476 return ''
486
477
487 # TODO: deltas
478 # TODO: deltas
488 ret = []
479 ret = []
489 ret.append('GIT binary patch\n')
480 ret.append('GIT binary patch\n')
490 ret.append('literal %d\n' % len(tn))
481 ret.append('literal %d\n' % len(tn))
491 for l in chunk(zlib.compress(tn)):
482 for l in chunk(zlib.compress(tn)):
492 ret.append(fmtline(l))
483 ret.append(fmtline(l))
493 ret.append('\n')
484 ret.append('\n')
494
485
495 return ''.join(ret)
486 return ''.join(ret)
496
487
497 def patchtext(bin):
488 def patchtext(bin):
498 pos = 0
489 pos = 0
499 t = []
490 t = []
500 while pos < len(bin):
491 while pos < len(bin):
501 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
492 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
502 pos += 12
493 pos += 12
503 t.append(bin[pos:pos + l])
494 t.append(bin[pos:pos + l])
504 pos += l
495 pos += l
505 return "".join(t)
496 return "".join(t)
506
497
507 def patch(a, bin):
498 def patch(a, bin):
508 if len(a) == 0:
499 if len(a) == 0:
509 # skip over trivial delta header
500 # skip over trivial delta header
510 return util.buffer(bin, 12)
501 return util.buffer(bin, 12)
511 return mpatch.patches(a, [bin])
502 return mpatch.patches(a, [bin])
512
503
513 # similar to difflib.SequenceMatcher.get_matching_blocks
504 # similar to difflib.SequenceMatcher.get_matching_blocks
514 def get_matching_blocks(a, b):
505 def get_matching_blocks(a, b):
515 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
506 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
516
507
517 def trivialdiffheader(length):
508 def trivialdiffheader(length):
518 return struct.pack(">lll", 0, 0, length) if length else ''
509 return struct.pack(">lll", 0, 0, length) if length else ''
519
510
520 def replacediffheader(oldlen, newlen):
511 def replacediffheader(oldlen, newlen):
521 return struct.pack(">lll", 0, oldlen, newlen)
512 return struct.pack(">lll", 0, oldlen, newlen)
@@ -1,116 +1,116 b''
1 # policy.py - module policy logic for Mercurial.
1 # policy.py - module policy logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import sys
11 import sys
12
12
13 # Rules for how modules can be loaded. Values are:
13 # Rules for how modules can be loaded. Values are:
14 #
14 #
15 # c - require C extensions
15 # c - require C extensions
16 # allow - allow pure Python implementation when C loading fails
16 # allow - allow pure Python implementation when C loading fails
17 # cffi - required cffi versions (implemented within pure module)
17 # cffi - required cffi versions (implemented within pure module)
18 # cffi-allow - allow pure Python implementation if cffi version is missing
18 # cffi-allow - allow pure Python implementation if cffi version is missing
19 # py - only load pure Python modules
19 # py - only load pure Python modules
20 #
20 #
21 # By default, fall back to the pure modules so the in-place build can
21 # By default, fall back to the pure modules so the in-place build can
22 # run without recompiling the C extensions. This will be overridden by
22 # run without recompiling the C extensions. This will be overridden by
23 # __modulepolicy__ generated by setup.py.
23 # __modulepolicy__ generated by setup.py.
24 policy = b'allow'
24 policy = b'allow'
25 _packageprefs = {
25 _packageprefs = {
26 # policy: (versioned package, pure package)
26 # policy: (versioned package, pure package)
27 b'c': (r'cext', None),
27 b'c': (r'cext', None),
28 b'allow': (r'cext', r'pure'),
28 b'allow': (r'cext', r'pure'),
29 b'cffi': (r'cffi', None),
29 b'cffi': (r'cffi', None),
30 b'cffi-allow': (r'cffi', r'pure'),
30 b'cffi-allow': (r'cffi', r'pure'),
31 b'py': (None, r'pure'),
31 b'py': (None, r'pure'),
32 }
32 }
33
33
34 try:
34 try:
35 from . import __modulepolicy__
35 from . import __modulepolicy__
36 policy = __modulepolicy__.modulepolicy
36 policy = __modulepolicy__.modulepolicy
37 except ImportError:
37 except ImportError:
38 pass
38 pass
39
39
40 # PyPy doesn't load C extensions.
40 # PyPy doesn't load C extensions.
41 #
41 #
42 # The canonical way to do this is to test platform.python_implementation().
42 # The canonical way to do this is to test platform.python_implementation().
43 # But we don't import platform and don't bloat for it here.
43 # But we don't import platform and don't bloat for it here.
44 if r'__pypy__' in sys.builtin_module_names:
44 if r'__pypy__' in sys.builtin_module_names:
45 policy = b'cffi'
45 policy = b'cffi'
46
46
47 # Our C extensions aren't yet compatible with Python 3. So use pure Python
47 # Our C extensions aren't yet compatible with Python 3. So use pure Python
48 # on Python 3 for now.
48 # on Python 3 for now.
49 if sys.version_info[0] >= 3:
49 if sys.version_info[0] >= 3:
50 policy = b'py'
50 policy = b'py'
51
51
52 # Environment variable can always force settings.
52 # Environment variable can always force settings.
53 if sys.version_info[0] >= 3:
53 if sys.version_info[0] >= 3:
54 if r'HGMODULEPOLICY' in os.environ:
54 if r'HGMODULEPOLICY' in os.environ:
55 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
55 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
56 else:
56 else:
57 policy = os.environ.get(r'HGMODULEPOLICY', policy)
57 policy = os.environ.get(r'HGMODULEPOLICY', policy)
58
58
59 def _importfrom(pkgname, modname):
59 def _importfrom(pkgname, modname):
60 # from .<pkgname> import <modname> (where . is looked through this module)
60 # from .<pkgname> import <modname> (where . is looked through this module)
61 fakelocals = {}
61 fakelocals = {}
62 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
62 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
63 try:
63 try:
64 fakelocals[modname] = mod = getattr(pkg, modname)
64 fakelocals[modname] = mod = getattr(pkg, modname)
65 except AttributeError:
65 except AttributeError:
66 raise ImportError(r'cannot import name %s' % modname)
66 raise ImportError(r'cannot import name %s' % modname)
67 # force import; fakelocals[modname] may be replaced with the real module
67 # force import; fakelocals[modname] may be replaced with the real module
68 getattr(mod, r'__doc__', None)
68 getattr(mod, r'__doc__', None)
69 return fakelocals[modname]
69 return fakelocals[modname]
70
70
71 # keep in sync with "version" in C modules
71 # keep in sync with "version" in C modules
72 _cextversions = {
72 _cextversions = {
73 (r'cext', r'base85'): 1,
73 (r'cext', r'base85'): 1,
74 (r'cext', r'bdiff'): 1,
74 (r'cext', r'bdiff'): 2,
75 (r'cext', r'diffhelpers'): 1,
75 (r'cext', r'diffhelpers'): 1,
76 (r'cext', r'mpatch'): 1,
76 (r'cext', r'mpatch'): 1,
77 (r'cext', r'osutil'): 3,
77 (r'cext', r'osutil'): 3,
78 (r'cext', r'parsers'): 4,
78 (r'cext', r'parsers'): 4,
79 }
79 }
80
80
81 # map import request to other package or module
81 # map import request to other package or module
82 _modredirects = {
82 _modredirects = {
83 (r'cext', r'charencode'): (r'cext', r'parsers'),
83 (r'cext', r'charencode'): (r'cext', r'parsers'),
84 (r'cffi', r'base85'): (r'pure', r'base85'),
84 (r'cffi', r'base85'): (r'pure', r'base85'),
85 (r'cffi', r'charencode'): (r'pure', r'charencode'),
85 (r'cffi', r'charencode'): (r'pure', r'charencode'),
86 (r'cffi', r'diffhelpers'): (r'pure', r'diffhelpers'),
86 (r'cffi', r'diffhelpers'): (r'pure', r'diffhelpers'),
87 (r'cffi', r'parsers'): (r'pure', r'parsers'),
87 (r'cffi', r'parsers'): (r'pure', r'parsers'),
88 }
88 }
89
89
90 def _checkmod(pkgname, modname, mod):
90 def _checkmod(pkgname, modname, mod):
91 expected = _cextversions.get((pkgname, modname))
91 expected = _cextversions.get((pkgname, modname))
92 actual = getattr(mod, r'version', None)
92 actual = getattr(mod, r'version', None)
93 if actual != expected:
93 if actual != expected:
94 raise ImportError(r'cannot import module %s.%s '
94 raise ImportError(r'cannot import module %s.%s '
95 r'(expected version: %d, actual: %r)'
95 r'(expected version: %d, actual: %r)'
96 % (pkgname, modname, expected, actual))
96 % (pkgname, modname, expected, actual))
97
97
98 def importmod(modname):
98 def importmod(modname):
99 """Import module according to policy and check API version"""
99 """Import module according to policy and check API version"""
100 try:
100 try:
101 verpkg, purepkg = _packageprefs[policy]
101 verpkg, purepkg = _packageprefs[policy]
102 except KeyError:
102 except KeyError:
103 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
103 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
104 assert verpkg or purepkg
104 assert verpkg or purepkg
105 if verpkg:
105 if verpkg:
106 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
106 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
107 try:
107 try:
108 mod = _importfrom(pn, mn)
108 mod = _importfrom(pn, mn)
109 if pn == verpkg:
109 if pn == verpkg:
110 _checkmod(pn, mn, mod)
110 _checkmod(pn, mn, mod)
111 return mod
111 return mod
112 except ImportError:
112 except ImportError:
113 if not purepkg:
113 if not purepkg:
114 raise
114 raise
115 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
115 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
116 return _importfrom(pn, mn)
116 return _importfrom(pn, mn)
@@ -1,92 +1,102 b''
1 # bdiff.py - Python implementation of bdiff.c
1 # bdiff.py - Python implementation of bdiff.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import difflib
10 import difflib
11 import re
11 import re
12 import struct
12 import struct
13
13
14 def splitnewlines(text):
14 def splitnewlines(text):
15 '''like str.splitlines, but only split on newlines.'''
15 '''like str.splitlines, but only split on newlines.'''
16 lines = [l + '\n' for l in text.split('\n')]
16 lines = [l + '\n' for l in text.split('\n')]
17 if lines:
17 if lines:
18 if lines[-1] == '\n':
18 if lines[-1] == '\n':
19 lines.pop()
19 lines.pop()
20 else:
20 else:
21 lines[-1] = lines[-1][:-1]
21 lines[-1] = lines[-1][:-1]
22 return lines
22 return lines
23
23
24 def _normalizeblocks(a, b, blocks):
24 def _normalizeblocks(a, b, blocks):
25 prev = None
25 prev = None
26 r = []
26 r = []
27 for curr in blocks:
27 for curr in blocks:
28 if prev is None:
28 if prev is None:
29 prev = curr
29 prev = curr
30 continue
30 continue
31 shift = 0
31 shift = 0
32
32
33 a1, b1, l1 = prev
33 a1, b1, l1 = prev
34 a1end = a1 + l1
34 a1end = a1 + l1
35 b1end = b1 + l1
35 b1end = b1 + l1
36
36
37 a2, b2, l2 = curr
37 a2, b2, l2 = curr
38 a2end = a2 + l2
38 a2end = a2 + l2
39 b2end = b2 + l2
39 b2end = b2 + l2
40 if a1end == a2:
40 if a1end == a2:
41 while (a1end + shift < a2end and
41 while (a1end + shift < a2end and
42 a[a1end + shift] == b[b1end + shift]):
42 a[a1end + shift] == b[b1end + shift]):
43 shift += 1
43 shift += 1
44 elif b1end == b2:
44 elif b1end == b2:
45 while (b1end + shift < b2end and
45 while (b1end + shift < b2end and
46 a[a1end + shift] == b[b1end + shift]):
46 a[a1end + shift] == b[b1end + shift]):
47 shift += 1
47 shift += 1
48 r.append((a1, b1, l1 + shift))
48 r.append((a1, b1, l1 + shift))
49 prev = a2 + shift, b2 + shift, l2 - shift
49 prev = a2 + shift, b2 + shift, l2 - shift
50 r.append(prev)
50 r.append(prev)
51 return r
51 return r
52
52
53 def bdiff(a, b):
53 def bdiff(a, b):
54 a = bytes(a).splitlines(True)
54 a = bytes(a).splitlines(True)
55 b = bytes(b).splitlines(True)
55 b = bytes(b).splitlines(True)
56
56
57 if not a:
57 if not a:
58 s = "".join(b)
58 s = "".join(b)
59 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
59 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
60
60
61 bin = []
61 bin = []
62 p = [0]
62 p = [0]
63 for i in a:
63 for i in a:
64 p.append(p[-1] + len(i))
64 p.append(p[-1] + len(i))
65
65
66 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
66 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
67 d = _normalizeblocks(a, b, d)
67 d = _normalizeblocks(a, b, d)
68 la = 0
68 la = 0
69 lb = 0
69 lb = 0
70 for am, bm, size in d:
70 for am, bm, size in d:
71 s = "".join(b[lb:bm])
71 s = "".join(b[lb:bm])
72 if am > la or s:
72 if am > la or s:
73 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
73 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
74 la = am + size
74 la = am + size
75 lb = bm + size
75 lb = bm + size
76
76
77 return "".join(bin)
77 return "".join(bin)
78
78
79 def blocks(a, b):
79 def blocks(a, b):
80 an = splitnewlines(a)
80 an = splitnewlines(a)
81 bn = splitnewlines(b)
81 bn = splitnewlines(b)
82 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
82 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
83 d = _normalizeblocks(an, bn, d)
83 d = _normalizeblocks(an, bn, d)
84 return [(i, i + n, j, j + n) for (i, j, n) in d]
84 return [(i, i + n, j, j + n) for (i, j, n) in d]
85
85
86 def fixws(text, allws):
86 def fixws(text, allws):
87 if allws:
87 if allws:
88 text = re.sub('[ \t\r]+', '', text)
88 text = re.sub('[ \t\r]+', '', text)
89 else:
89 else:
90 text = re.sub('[ \t\r]+', ' ', text)
90 text = re.sub('[ \t\r]+', ' ', text)
91 text = text.replace(' \n', '\n')
91 text = text.replace(' \n', '\n')
92 return text
92 return text
93
94 def splitnewlines(text):
95 '''like str.splitlines, but only split on newlines.'''
96 lines = [l + '\n' for l in text.split('\n')]
97 if lines:
98 if lines[-1] == '\n':
99 lines.pop()
100 else:
101 lines[-1] = lines[-1][:-1]
102 return lines
General Comments 0
You need to be logged in to leave comments. Login now