##// END OF EJS Templates
cext: stop worrying and love the free(NULL)...
Josef 'Jeff' Sipek -
r38320:d9e87566 stable
parent child Browse files
Show More
@@ -1,346 +1,342 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #define PY_SSIZE_T_CLEAN
12 #define PY_SSIZE_T_CLEAN
13 #include <Python.h>
13 #include <Python.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stdlib.h>
15 #include <stdlib.h>
16 #include <string.h>
16 #include <string.h>
17
17
18 #include "bdiff.h"
18 #include "bdiff.h"
19 #include "bitmanipulation.h"
19 #include "bitmanipulation.h"
20 #include "thirdparty/xdiff/xdiff.h"
20 #include "thirdparty/xdiff/xdiff.h"
21 #include "util.h"
21 #include "util.h"
22
22
23 static PyObject *blocks(PyObject *self, PyObject *args)
23 static PyObject *blocks(PyObject *self, PyObject *args)
24 {
24 {
25 PyObject *sa, *sb, *rl = NULL, *m;
25 PyObject *sa, *sb, *rl = NULL, *m;
26 struct bdiff_line *a, *b;
26 struct bdiff_line *a, *b;
27 struct bdiff_hunk l, *h;
27 struct bdiff_hunk l, *h;
28 int an, bn, count, pos = 0;
28 int an, bn, count, pos = 0;
29
29
30 l.next = NULL;
30 l.next = NULL;
31
31
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
33 return NULL;
33 return NULL;
34
34
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
37
37
38 if (!a || !b)
38 if (!a || !b)
39 goto nomem;
39 goto nomem;
40
40
41 count = bdiff_diff(a, an, b, bn, &l);
41 count = bdiff_diff(a, an, b, bn, &l);
42 if (count < 0)
42 if (count < 0)
43 goto nomem;
43 goto nomem;
44
44
45 rl = PyList_New(count);
45 rl = PyList_New(count);
46 if (!rl)
46 if (!rl)
47 goto nomem;
47 goto nomem;
48
48
49 for (h = l.next; h; h = h->next) {
49 for (h = l.next; h; h = h->next) {
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
51 PyList_SetItem(rl, pos, m);
51 PyList_SetItem(rl, pos, m);
52 pos++;
52 pos++;
53 }
53 }
54
54
55 nomem:
55 nomem:
56 free(a);
56 free(a);
57 free(b);
57 free(b);
58 bdiff_freehunks(l.next);
58 bdiff_freehunks(l.next);
59 return rl ? rl : PyErr_NoMemory();
59 return rl ? rl : PyErr_NoMemory();
60 }
60 }
61
61
62 static PyObject *bdiff(PyObject *self, PyObject *args)
62 static PyObject *bdiff(PyObject *self, PyObject *args)
63 {
63 {
64 Py_buffer ba, bb;
64 Py_buffer ba, bb;
65 char *rb, *ia, *ib;
65 char *rb, *ia, *ib;
66 PyObject *result = NULL;
66 PyObject *result = NULL;
67 struct bdiff_line *al = NULL, *bl = NULL;
67 struct bdiff_line *al = NULL, *bl = NULL;
68 struct bdiff_hunk l, *h;
68 struct bdiff_hunk l, *h;
69 int an, bn, count;
69 int an, bn, count;
70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
71 PyThreadState *_save = NULL;
71 PyThreadState *_save = NULL;
72
72
73 l.next = NULL;
73 l.next = NULL;
74
74
75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
76 return NULL;
76 return NULL;
77
77
78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
80 goto cleanup;
80 goto cleanup;
81 }
81 }
82
82
83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
85 goto cleanup;
85 goto cleanup;
86 }
86 }
87
87
88 la = ba.len;
88 la = ba.len;
89 lb = bb.len;
89 lb = bb.len;
90
90
91 if (la > UINT_MAX || lb > UINT_MAX) {
91 if (la > UINT_MAX || lb > UINT_MAX) {
92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
93 goto cleanup;
93 goto cleanup;
94 }
94 }
95
95
96 _save = PyEval_SaveThread();
96 _save = PyEval_SaveThread();
97
97
98 lmax = la > lb ? lb : la;
98 lmax = la > lb ? lb : la;
99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
100 ++li, ++ia, ++ib) {
100 ++li, ++ia, ++ib) {
101 if (*ia == '\n')
101 if (*ia == '\n')
102 lcommon = li + 1;
102 lcommon = li + 1;
103 }
103 }
104 /* we can almost add: if (li == lmax) lcommon = li; */
104 /* we can almost add: if (li == lmax) lcommon = li; */
105
105
106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
108 if (!al || !bl) {
108 if (!al || !bl) {
109 PyErr_NoMemory();
109 PyErr_NoMemory();
110 goto cleanup;
110 goto cleanup;
111 }
111 }
112
112
113 count = bdiff_diff(al, an, bl, bn, &l);
113 count = bdiff_diff(al, an, bl, bn, &l);
114 if (count < 0) {
114 if (count < 0) {
115 PyErr_NoMemory();
115 PyErr_NoMemory();
116 goto cleanup;
116 goto cleanup;
117 }
117 }
118
118
119 /* calculate length of output */
119 /* calculate length of output */
120 la = lb = 0;
120 la = lb = 0;
121 for (h = l.next; h; h = h->next) {
121 for (h = l.next; h; h = h->next) {
122 if (h->a1 != la || h->b1 != lb)
122 if (h->a1 != la || h->b1 != lb)
123 len += 12 + bl[h->b1].l - bl[lb].l;
123 len += 12 + bl[h->b1].l - bl[lb].l;
124 la = h->a2;
124 la = h->a2;
125 lb = h->b2;
125 lb = h->b2;
126 }
126 }
127 PyEval_RestoreThread(_save);
127 PyEval_RestoreThread(_save);
128 _save = NULL;
128 _save = NULL;
129
129
130 result = PyBytes_FromStringAndSize(NULL, len);
130 result = PyBytes_FromStringAndSize(NULL, len);
131
131
132 if (!result)
132 if (!result)
133 goto cleanup;
133 goto cleanup;
134
134
135 /* build binary patch */
135 /* build binary patch */
136 rb = PyBytes_AsString(result);
136 rb = PyBytes_AsString(result);
137 la = lb = 0;
137 la = lb = 0;
138
138
139 for (h = l.next; h; h = h->next) {
139 for (h = l.next; h; h = h->next) {
140 if (h->a1 != la || h->b1 != lb) {
140 if (h->a1 != la || h->b1 != lb) {
141 len = bl[h->b1].l - bl[lb].l;
141 len = bl[h->b1].l - bl[lb].l;
142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
144 rb + 4);
144 rb + 4);
145 putbe32((uint32_t)len, rb + 8);
145 putbe32((uint32_t)len, rb + 8);
146 memcpy(rb + 12, bl[lb].l, len);
146 memcpy(rb + 12, bl[lb].l, len);
147 rb += 12 + len;
147 rb += 12 + len;
148 }
148 }
149 la = h->a2;
149 la = h->a2;
150 lb = h->b2;
150 lb = h->b2;
151 }
151 }
152
152
153 cleanup:
153 cleanup:
154 if (_save)
154 if (_save)
155 PyEval_RestoreThread(_save);
155 PyEval_RestoreThread(_save);
156 PyBuffer_Release(&ba);
156 PyBuffer_Release(&ba);
157 PyBuffer_Release(&bb);
157 PyBuffer_Release(&bb);
158 if (al) {
158 free(al);
159 free(al);
159 free(bl);
160 }
161 if (bl) {
162 free(bl);
163 }
164 if (l.next) {
160 if (l.next) {
165 bdiff_freehunks(l.next);
161 bdiff_freehunks(l.next);
166 }
162 }
167 return result;
163 return result;
168 }
164 }
169
165
170 /*
166 /*
171 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
167 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
172 * reduce whitespace sequences to a single space and trim remaining whitespace
168 * reduce whitespace sequences to a single space and trim remaining whitespace
173 * from end of lines.
169 * from end of lines.
174 */
170 */
175 static PyObject *fixws(PyObject *self, PyObject *args)
171 static PyObject *fixws(PyObject *self, PyObject *args)
176 {
172 {
177 PyObject *s, *result = NULL;
173 PyObject *s, *result = NULL;
178 char allws, c;
174 char allws, c;
179 const char *r;
175 const char *r;
180 Py_ssize_t i, rlen, wlen = 0;
176 Py_ssize_t i, rlen, wlen = 0;
181 char *w;
177 char *w;
182
178
183 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
179 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
184 return NULL;
180 return NULL;
185 r = PyBytes_AsString(s);
181 r = PyBytes_AsString(s);
186 rlen = PyBytes_Size(s);
182 rlen = PyBytes_Size(s);
187
183
188 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
184 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
189 if (!w)
185 if (!w)
190 goto nomem;
186 goto nomem;
191
187
192 for (i = 0; i != rlen; i++) {
188 for (i = 0; i != rlen; i++) {
193 c = r[i];
189 c = r[i];
194 if (c == ' ' || c == '\t' || c == '\r') {
190 if (c == ' ' || c == '\t' || c == '\r') {
195 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
191 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
196 w[wlen++] = ' ';
192 w[wlen++] = ' ';
197 } else if (c == '\n' && !allws && wlen > 0 &&
193 } else if (c == '\n' && !allws && wlen > 0 &&
198 w[wlen - 1] == ' ') {
194 w[wlen - 1] == ' ') {
199 w[wlen - 1] = '\n';
195 w[wlen - 1] = '\n';
200 } else {
196 } else {
201 w[wlen++] = c;
197 w[wlen++] = c;
202 }
198 }
203 }
199 }
204
200
205 result = PyBytes_FromStringAndSize(w, wlen);
201 result = PyBytes_FromStringAndSize(w, wlen);
206
202
207 nomem:
203 nomem:
208 PyMem_Free(w);
204 PyMem_Free(w);
209 return result ? result : PyErr_NoMemory();
205 return result ? result : PyErr_NoMemory();
210 }
206 }
211
207
212 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
208 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
213 const char *source, Py_ssize_t len)
209 const char *source, Py_ssize_t len)
214 {
210 {
215 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
211 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
216 if (sliced == NULL)
212 if (sliced == NULL)
217 return false;
213 return false;
218 PyList_SET_ITEM(list, destidx, sliced);
214 PyList_SET_ITEM(list, destidx, sliced);
219 return true;
215 return true;
220 }
216 }
221
217
222 static PyObject *splitnewlines(PyObject *self, PyObject *args)
218 static PyObject *splitnewlines(PyObject *self, PyObject *args)
223 {
219 {
224 const char *text;
220 const char *text;
225 Py_ssize_t nelts = 0, size, i, start = 0;
221 Py_ssize_t nelts = 0, size, i, start = 0;
226 PyObject *result = NULL;
222 PyObject *result = NULL;
227
223
228 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
224 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
229 goto abort;
225 goto abort;
230 }
226 }
231 if (!size) {
227 if (!size) {
232 return PyList_New(0);
228 return PyList_New(0);
233 }
229 }
234 /* This loops to size-1 because if the last byte is a newline,
230 /* This loops to size-1 because if the last byte is a newline,
235 * we don't want to perform a split there. */
231 * we don't want to perform a split there. */
236 for (i = 0; i < size - 1; ++i) {
232 for (i = 0; i < size - 1; ++i) {
237 if (text[i] == '\n') {
233 if (text[i] == '\n') {
238 ++nelts;
234 ++nelts;
239 }
235 }
240 }
236 }
241 if ((result = PyList_New(nelts + 1)) == NULL)
237 if ((result = PyList_New(nelts + 1)) == NULL)
242 goto abort;
238 goto abort;
243 nelts = 0;
239 nelts = 0;
244 for (i = 0; i < size - 1; ++i) {
240 for (i = 0; i < size - 1; ++i) {
245 if (text[i] == '\n') {
241 if (text[i] == '\n') {
246 if (!sliceintolist(result, nelts++, text + start,
242 if (!sliceintolist(result, nelts++, text + start,
247 i - start + 1))
243 i - start + 1))
248 goto abort;
244 goto abort;
249 start = i + 1;
245 start = i + 1;
250 }
246 }
251 }
247 }
252 if (!sliceintolist(result, nelts++, text + start, size - start))
248 if (!sliceintolist(result, nelts++, text + start, size - start))
253 goto abort;
249 goto abort;
254 return result;
250 return result;
255 abort:
251 abort:
256 Py_XDECREF(result);
252 Py_XDECREF(result);
257 return NULL;
253 return NULL;
258 }
254 }
259
255
260 static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2,
256 static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2,
261 void *priv)
257 void *priv)
262 {
258 {
263 PyObject *rl = (PyObject *)priv;
259 PyObject *rl = (PyObject *)priv;
264 PyObject *m = Py_BuildValue("LLLL", a1, a2, b1, b2);
260 PyObject *m = Py_BuildValue("LLLL", a1, a2, b1, b2);
265 if (!m)
261 if (!m)
266 return -1;
262 return -1;
267 if (PyList_Append(rl, m) != 0) {
263 if (PyList_Append(rl, m) != 0) {
268 Py_DECREF(m);
264 Py_DECREF(m);
269 return -1;
265 return -1;
270 }
266 }
271 return 0;
267 return 0;
272 }
268 }
273
269
274 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
270 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
275 {
271 {
276 Py_ssize_t la, lb;
272 Py_ssize_t la, lb;
277 mmfile_t a, b;
273 mmfile_t a, b;
278 PyObject *rl;
274 PyObject *rl;
279
275
280 xpparam_t xpp = {
276 xpparam_t xpp = {
281 XDF_INDENT_HEURISTIC, /* flags */
277 XDF_INDENT_HEURISTIC, /* flags */
282 };
278 };
283 xdemitconf_t xecfg = {
279 xdemitconf_t xecfg = {
284 XDL_EMIT_BDIFFHUNK, /* flags */
280 XDL_EMIT_BDIFFHUNK, /* flags */
285 hunk_consumer, /* hunk_consume_func */
281 hunk_consumer, /* hunk_consume_func */
286 };
282 };
287 xdemitcb_t ecb = {
283 xdemitcb_t ecb = {
288 NULL, /* priv */
284 NULL, /* priv */
289 };
285 };
290
286
291 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
287 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
292 &lb))
288 &lb))
293 return NULL;
289 return NULL;
294
290
295 a.size = la;
291 a.size = la;
296 b.size = lb;
292 b.size = lb;
297
293
298 rl = PyList_New(0);
294 rl = PyList_New(0);
299 if (!rl)
295 if (!rl)
300 return PyErr_NoMemory();
296 return PyErr_NoMemory();
301
297
302 ecb.priv = rl;
298 ecb.priv = rl;
303
299
304 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
300 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
305 Py_DECREF(rl);
301 Py_DECREF(rl);
306 return PyErr_NoMemory();
302 return PyErr_NoMemory();
307 }
303 }
308
304
309 return rl;
305 return rl;
310 }
306 }
311
307
312 static char mdiff_doc[] = "Efficient binary diff.";
308 static char mdiff_doc[] = "Efficient binary diff.";
313
309
314 static PyMethodDef methods[] = {
310 static PyMethodDef methods[] = {
315 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
311 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
316 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
312 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
317 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
313 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
318 {"splitnewlines", splitnewlines, METH_VARARGS,
314 {"splitnewlines", splitnewlines, METH_VARARGS,
319 "like str.splitlines, but only split on newlines\n"},
315 "like str.splitlines, but only split on newlines\n"},
320 {"xdiffblocks", xdiffblocks, METH_VARARGS,
316 {"xdiffblocks", xdiffblocks, METH_VARARGS,
321 "find a list of matching lines using xdiff algorithm\n"},
317 "find a list of matching lines using xdiff algorithm\n"},
322 {NULL, NULL},
318 {NULL, NULL},
323 };
319 };
324
320
325 static const int version = 3;
321 static const int version = 3;
326
322
327 #ifdef IS_PY3K
323 #ifdef IS_PY3K
328 static struct PyModuleDef bdiff_module = {
324 static struct PyModuleDef bdiff_module = {
329 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
325 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
330 };
326 };
331
327
332 PyMODINIT_FUNC PyInit_bdiff(void)
328 PyMODINIT_FUNC PyInit_bdiff(void)
333 {
329 {
334 PyObject *m;
330 PyObject *m;
335 m = PyModule_Create(&bdiff_module);
331 m = PyModule_Create(&bdiff_module);
336 PyModule_AddIntConstant(m, "version", version);
332 PyModule_AddIntConstant(m, "version", version);
337 return m;
333 return m;
338 }
334 }
339 #else
335 #else
340 PyMODINIT_FUNC initbdiff(void)
336 PyMODINIT_FUNC initbdiff(void)
341 {
337 {
342 PyObject *m;
338 PyObject *m;
343 m = Py_InitModule3("bdiff", methods, mdiff_doc);
339 m = Py_InitModule3("bdiff", methods, mdiff_doc);
344 PyModule_AddIntConstant(m, "version", version);
340 PyModule_AddIntConstant(m, "version", version);
345 }
341 }
346 #endif
342 #endif
@@ -1,942 +1,940 b''
1 /*
1 /*
2 * manifest.c - manifest type that does on-demand parsing.
2 * manifest.c - manifest type that does on-demand parsing.
3 *
3 *
4 * Copyright 2015, Google Inc.
4 * Copyright 2015, Google Inc.
5 *
5 *
6 * This software may be used and distributed according to the terms of
6 * This software may be used and distributed according to the terms of
7 * the GNU General Public License, incorporated herein by reference.
7 * the GNU General Public License, incorporated herein by reference.
8 */
8 */
9 #include <Python.h>
9 #include <Python.h>
10
10
11 #include <assert.h>
11 #include <assert.h>
12 #include <stdlib.h>
12 #include <stdlib.h>
13 #include <string.h>
13 #include <string.h>
14
14
15 #include "charencode.h"
15 #include "charencode.h"
16 #include "util.h"
16 #include "util.h"
17
17
18 #define DEFAULT_LINES 100000
18 #define DEFAULT_LINES 100000
19
19
20 typedef struct {
20 typedef struct {
21 char *start;
21 char *start;
22 Py_ssize_t len; /* length of line including terminal newline */
22 Py_ssize_t len; /* length of line including terminal newline */
23 char hash_suffix;
23 char hash_suffix;
24 bool from_malloc;
24 bool from_malloc;
25 bool deleted;
25 bool deleted;
26 } line;
26 } line;
27
27
28 typedef struct {
28 typedef struct {
29 PyObject_HEAD
29 PyObject_HEAD
30 PyObject *pydata;
30 PyObject *pydata;
31 line *lines;
31 line *lines;
32 int numlines; /* number of line entries */
32 int numlines; /* number of line entries */
33 int livelines; /* number of non-deleted lines */
33 int livelines; /* number of non-deleted lines */
34 int maxlines; /* allocated number of lines */
34 int maxlines; /* allocated number of lines */
35 bool dirty;
35 bool dirty;
36 } lazymanifest;
36 } lazymanifest;
37
37
38 #define MANIFEST_OOM -1
38 #define MANIFEST_OOM -1
39 #define MANIFEST_NOT_SORTED -2
39 #define MANIFEST_NOT_SORTED -2
40 #define MANIFEST_MALFORMED -3
40 #define MANIFEST_MALFORMED -3
41
41
42 /* get the length of the path for a line */
42 /* get the length of the path for a line */
43 static size_t pathlen(line *l)
43 static size_t pathlen(line *l)
44 {
44 {
45 return strlen(l->start);
45 return strlen(l->start);
46 }
46 }
47
47
48 /* get the node value of a single line */
48 /* get the node value of a single line */
49 static PyObject *nodeof(line *l)
49 static PyObject *nodeof(line *l)
50 {
50 {
51 char *s = l->start;
51 char *s = l->start;
52 ssize_t llen = pathlen(l);
52 ssize_t llen = pathlen(l);
53 PyObject *hash = unhexlify(s + llen + 1, 40);
53 PyObject *hash = unhexlify(s + llen + 1, 40);
54 if (!hash) {
54 if (!hash) {
55 return NULL;
55 return NULL;
56 }
56 }
57 if (l->hash_suffix != '\0') {
57 if (l->hash_suffix != '\0') {
58 char newhash[21];
58 char newhash[21];
59 memcpy(newhash, PyBytes_AsString(hash), 20);
59 memcpy(newhash, PyBytes_AsString(hash), 20);
60 Py_DECREF(hash);
60 Py_DECREF(hash);
61 newhash[20] = l->hash_suffix;
61 newhash[20] = l->hash_suffix;
62 hash = PyBytes_FromStringAndSize(newhash, 21);
62 hash = PyBytes_FromStringAndSize(newhash, 21);
63 }
63 }
64 return hash;
64 return hash;
65 }
65 }
66
66
67 /* get the node hash and flags of a line as a tuple */
67 /* get the node hash and flags of a line as a tuple */
68 static PyObject *hashflags(line *l)
68 static PyObject *hashflags(line *l)
69 {
69 {
70 char *s = l->start;
70 char *s = l->start;
71 size_t plen = pathlen(l);
71 size_t plen = pathlen(l);
72 PyObject *hash = nodeof(l);
72 PyObject *hash = nodeof(l);
73
73
74 /* 40 for hash, 1 for null byte, 1 for newline */
74 /* 40 for hash, 1 for null byte, 1 for newline */
75 size_t hplen = plen + 42;
75 size_t hplen = plen + 42;
76 Py_ssize_t flen = l->len - hplen;
76 Py_ssize_t flen = l->len - hplen;
77 PyObject *flags;
77 PyObject *flags;
78 PyObject *tup;
78 PyObject *tup;
79
79
80 if (!hash)
80 if (!hash)
81 return NULL;
81 return NULL;
82 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
82 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
83 if (!flags) {
83 if (!flags) {
84 Py_DECREF(hash);
84 Py_DECREF(hash);
85 return NULL;
85 return NULL;
86 }
86 }
87 tup = PyTuple_Pack(2, hash, flags);
87 tup = PyTuple_Pack(2, hash, flags);
88 Py_DECREF(flags);
88 Py_DECREF(flags);
89 Py_DECREF(hash);
89 Py_DECREF(hash);
90 return tup;
90 return tup;
91 }
91 }
92
92
93 /* if we're about to run out of space in the line index, add more */
93 /* if we're about to run out of space in the line index, add more */
94 static bool realloc_if_full(lazymanifest *self)
94 static bool realloc_if_full(lazymanifest *self)
95 {
95 {
96 if (self->numlines == self->maxlines) {
96 if (self->numlines == self->maxlines) {
97 self->maxlines *= 2;
97 self->maxlines *= 2;
98 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
98 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
99 }
99 }
100 return !!self->lines;
100 return !!self->lines;
101 }
101 }
102
102
103 /*
103 /*
104 * Find the line boundaries in the manifest that 'data' points to and store
104 * Find the line boundaries in the manifest that 'data' points to and store
105 * information about each line in 'self'.
105 * information about each line in 'self'.
106 */
106 */
107 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
107 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
108 {
108 {
109 char *prev = NULL;
109 char *prev = NULL;
110 while (len > 0) {
110 while (len > 0) {
111 line *l;
111 line *l;
112 char *next = memchr(data, '\n', len);
112 char *next = memchr(data, '\n', len);
113 if (!next) {
113 if (!next) {
114 return MANIFEST_MALFORMED;
114 return MANIFEST_MALFORMED;
115 }
115 }
116 next++; /* advance past newline */
116 next++; /* advance past newline */
117 if (!realloc_if_full(self)) {
117 if (!realloc_if_full(self)) {
118 return MANIFEST_OOM; /* no memory */
118 return MANIFEST_OOM; /* no memory */
119 }
119 }
120 if (prev && strcmp(prev, data) > -1) {
120 if (prev && strcmp(prev, data) > -1) {
121 /* This data isn't sorted, so we have to abort. */
121 /* This data isn't sorted, so we have to abort. */
122 return MANIFEST_NOT_SORTED;
122 return MANIFEST_NOT_SORTED;
123 }
123 }
124 l = self->lines + ((self->numlines)++);
124 l = self->lines + ((self->numlines)++);
125 l->start = data;
125 l->start = data;
126 l->len = next - data;
126 l->len = next - data;
127 l->hash_suffix = '\0';
127 l->hash_suffix = '\0';
128 l->from_malloc = false;
128 l->from_malloc = false;
129 l->deleted = false;
129 l->deleted = false;
130 len = len - l->len;
130 len = len - l->len;
131 prev = data;
131 prev = data;
132 data = next;
132 data = next;
133 }
133 }
134 self->livelines = self->numlines;
134 self->livelines = self->numlines;
135 return 0;
135 return 0;
136 }
136 }
137
137
138 static int lazymanifest_init(lazymanifest *self, PyObject *args)
138 static int lazymanifest_init(lazymanifest *self, PyObject *args)
139 {
139 {
140 char *data;
140 char *data;
141 Py_ssize_t len;
141 Py_ssize_t len;
142 int err, ret;
142 int err, ret;
143 PyObject *pydata;
143 PyObject *pydata;
144 if (!PyArg_ParseTuple(args, "S", &pydata)) {
144 if (!PyArg_ParseTuple(args, "S", &pydata)) {
145 return -1;
145 return -1;
146 }
146 }
147 err = PyBytes_AsStringAndSize(pydata, &data, &len);
147 err = PyBytes_AsStringAndSize(pydata, &data, &len);
148
148
149 self->dirty = false;
149 self->dirty = false;
150 if (err == -1)
150 if (err == -1)
151 return -1;
151 return -1;
152 self->pydata = pydata;
152 self->pydata = pydata;
153 Py_INCREF(self->pydata);
153 Py_INCREF(self->pydata);
154 Py_BEGIN_ALLOW_THREADS
154 Py_BEGIN_ALLOW_THREADS
155 self->lines = malloc(DEFAULT_LINES * sizeof(line));
155 self->lines = malloc(DEFAULT_LINES * sizeof(line));
156 self->maxlines = DEFAULT_LINES;
156 self->maxlines = DEFAULT_LINES;
157 self->numlines = 0;
157 self->numlines = 0;
158 if (!self->lines)
158 if (!self->lines)
159 ret = MANIFEST_OOM;
159 ret = MANIFEST_OOM;
160 else
160 else
161 ret = find_lines(self, data, len);
161 ret = find_lines(self, data, len);
162 Py_END_ALLOW_THREADS
162 Py_END_ALLOW_THREADS
163 switch (ret) {
163 switch (ret) {
164 case 0:
164 case 0:
165 break;
165 break;
166 case MANIFEST_OOM:
166 case MANIFEST_OOM:
167 PyErr_NoMemory();
167 PyErr_NoMemory();
168 break;
168 break;
169 case MANIFEST_NOT_SORTED:
169 case MANIFEST_NOT_SORTED:
170 PyErr_Format(PyExc_ValueError,
170 PyErr_Format(PyExc_ValueError,
171 "Manifest lines not in sorted order.");
171 "Manifest lines not in sorted order.");
172 break;
172 break;
173 case MANIFEST_MALFORMED:
173 case MANIFEST_MALFORMED:
174 PyErr_Format(PyExc_ValueError,
174 PyErr_Format(PyExc_ValueError,
175 "Manifest did not end in a newline.");
175 "Manifest did not end in a newline.");
176 break;
176 break;
177 default:
177 default:
178 PyErr_Format(PyExc_ValueError,
178 PyErr_Format(PyExc_ValueError,
179 "Unknown problem parsing manifest.");
179 "Unknown problem parsing manifest.");
180 }
180 }
181 return ret == 0 ? 0 : -1;
181 return ret == 0 ? 0 : -1;
182 }
182 }
183
183
184 static void lazymanifest_dealloc(lazymanifest *self)
184 static void lazymanifest_dealloc(lazymanifest *self)
185 {
185 {
186 /* free any extra lines we had to allocate */
186 /* free any extra lines we had to allocate */
187 int i;
187 int i;
188 for (i = 0; i < self->numlines; i++) {
188 for (i = 0; i < self->numlines; i++) {
189 if (self->lines[i].from_malloc) {
189 if (self->lines[i].from_malloc) {
190 free(self->lines[i].start);
190 free(self->lines[i].start);
191 }
191 }
192 }
192 }
193 if (self->lines) {
193 free(self->lines);
194 free(self->lines);
194 self->lines = NULL;
195 self->lines = NULL;
196 }
197 if (self->pydata) {
195 if (self->pydata) {
198 Py_DECREF(self->pydata);
196 Py_DECREF(self->pydata);
199 self->pydata = NULL;
197 self->pydata = NULL;
200 }
198 }
201 PyObject_Del(self);
199 PyObject_Del(self);
202 }
200 }
203
201
204 /* iteration support */
202 /* iteration support */
205
203
206 typedef struct {
204 typedef struct {
207 PyObject_HEAD lazymanifest *m;
205 PyObject_HEAD lazymanifest *m;
208 Py_ssize_t pos;
206 Py_ssize_t pos;
209 } lmIter;
207 } lmIter;
210
208
211 static void lmiter_dealloc(PyObject *o)
209 static void lmiter_dealloc(PyObject *o)
212 {
210 {
213 lmIter *self = (lmIter *)o;
211 lmIter *self = (lmIter *)o;
214 Py_DECREF(self->m);
212 Py_DECREF(self->m);
215 PyObject_Del(self);
213 PyObject_Del(self);
216 }
214 }
217
215
218 static line *lmiter_nextline(lmIter *self)
216 static line *lmiter_nextline(lmIter *self)
219 {
217 {
220 do {
218 do {
221 self->pos++;
219 self->pos++;
222 if (self->pos >= self->m->numlines) {
220 if (self->pos >= self->m->numlines) {
223 return NULL;
221 return NULL;
224 }
222 }
225 /* skip over deleted manifest entries */
223 /* skip over deleted manifest entries */
226 } while (self->m->lines[self->pos].deleted);
224 } while (self->m->lines[self->pos].deleted);
227 return self->m->lines + self->pos;
225 return self->m->lines + self->pos;
228 }
226 }
229
227
230 static PyObject *lmiter_iterentriesnext(PyObject *o)
228 static PyObject *lmiter_iterentriesnext(PyObject *o)
231 {
229 {
232 size_t pl;
230 size_t pl;
233 line *l;
231 line *l;
234 Py_ssize_t consumed;
232 Py_ssize_t consumed;
235 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
233 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
236 l = lmiter_nextline((lmIter *)o);
234 l = lmiter_nextline((lmIter *)o);
237 if (!l) {
235 if (!l) {
238 goto done;
236 goto done;
239 }
237 }
240 pl = pathlen(l);
238 pl = pathlen(l);
241 path = PyBytes_FromStringAndSize(l->start, pl);
239 path = PyBytes_FromStringAndSize(l->start, pl);
242 hash = nodeof(l);
240 hash = nodeof(l);
243 consumed = pl + 41;
241 consumed = pl + 41;
244 flags = PyBytes_FromStringAndSize(l->start + consumed,
242 flags = PyBytes_FromStringAndSize(l->start + consumed,
245 l->len - consumed - 1);
243 l->len - consumed - 1);
246 if (!path || !hash || !flags) {
244 if (!path || !hash || !flags) {
247 goto done;
245 goto done;
248 }
246 }
249 ret = PyTuple_Pack(3, path, hash, flags);
247 ret = PyTuple_Pack(3, path, hash, flags);
250 done:
248 done:
251 Py_XDECREF(path);
249 Py_XDECREF(path);
252 Py_XDECREF(hash);
250 Py_XDECREF(hash);
253 Py_XDECREF(flags);
251 Py_XDECREF(flags);
254 return ret;
252 return ret;
255 }
253 }
256
254
257 #ifdef IS_PY3K
255 #ifdef IS_PY3K
258 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
256 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
259 #else
257 #else
260 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
258 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
261 | Py_TPFLAGS_HAVE_ITER
259 | Py_TPFLAGS_HAVE_ITER
262 #endif
260 #endif
263
261
264 static PyTypeObject lazymanifestEntriesIterator = {
262 static PyTypeObject lazymanifestEntriesIterator = {
265 PyVarObject_HEAD_INIT(NULL, 0) /* header */
263 PyVarObject_HEAD_INIT(NULL, 0) /* header */
266 "parsers.lazymanifest.entriesiterator", /*tp_name */
264 "parsers.lazymanifest.entriesiterator", /*tp_name */
267 sizeof(lmIter), /*tp_basicsize */
265 sizeof(lmIter), /*tp_basicsize */
268 0, /*tp_itemsize */
266 0, /*tp_itemsize */
269 lmiter_dealloc, /*tp_dealloc */
267 lmiter_dealloc, /*tp_dealloc */
270 0, /*tp_print */
268 0, /*tp_print */
271 0, /*tp_getattr */
269 0, /*tp_getattr */
272 0, /*tp_setattr */
270 0, /*tp_setattr */
273 0, /*tp_compare */
271 0, /*tp_compare */
274 0, /*tp_repr */
272 0, /*tp_repr */
275 0, /*tp_as_number */
273 0, /*tp_as_number */
276 0, /*tp_as_sequence */
274 0, /*tp_as_sequence */
277 0, /*tp_as_mapping */
275 0, /*tp_as_mapping */
278 0, /*tp_hash */
276 0, /*tp_hash */
279 0, /*tp_call */
277 0, /*tp_call */
280 0, /*tp_str */
278 0, /*tp_str */
281 0, /*tp_getattro */
279 0, /*tp_getattro */
282 0, /*tp_setattro */
280 0, /*tp_setattro */
283 0, /*tp_as_buffer */
281 0, /*tp_as_buffer */
284 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
282 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
285 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
283 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
286 0, /* tp_traverse */
284 0, /* tp_traverse */
287 0, /* tp_clear */
285 0, /* tp_clear */
288 0, /* tp_richcompare */
286 0, /* tp_richcompare */
289 0, /* tp_weaklistoffset */
287 0, /* tp_weaklistoffset */
290 PyObject_SelfIter, /* tp_iter: __iter__() method */
288 PyObject_SelfIter, /* tp_iter: __iter__() method */
291 lmiter_iterentriesnext, /* tp_iternext: next() method */
289 lmiter_iterentriesnext, /* tp_iternext: next() method */
292 };
290 };
293
291
294 static PyObject *lmiter_iterkeysnext(PyObject *o)
292 static PyObject *lmiter_iterkeysnext(PyObject *o)
295 {
293 {
296 size_t pl;
294 size_t pl;
297 line *l = lmiter_nextline((lmIter *)o);
295 line *l = lmiter_nextline((lmIter *)o);
298 if (!l) {
296 if (!l) {
299 return NULL;
297 return NULL;
300 }
298 }
301 pl = pathlen(l);
299 pl = pathlen(l);
302 return PyBytes_FromStringAndSize(l->start, pl);
300 return PyBytes_FromStringAndSize(l->start, pl);
303 }
301 }
304
302
305 #ifdef IS_PY3K
303 #ifdef IS_PY3K
306 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
304 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
307 #else
305 #else
308 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
306 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
309 | Py_TPFLAGS_HAVE_ITER
307 | Py_TPFLAGS_HAVE_ITER
310 #endif
308 #endif
311
309
312 static PyTypeObject lazymanifestKeysIterator = {
310 static PyTypeObject lazymanifestKeysIterator = {
313 PyVarObject_HEAD_INIT(NULL, 0) /* header */
311 PyVarObject_HEAD_INIT(NULL, 0) /* header */
314 "parsers.lazymanifest.keysiterator", /*tp_name */
312 "parsers.lazymanifest.keysiterator", /*tp_name */
315 sizeof(lmIter), /*tp_basicsize */
313 sizeof(lmIter), /*tp_basicsize */
316 0, /*tp_itemsize */
314 0, /*tp_itemsize */
317 lmiter_dealloc, /*tp_dealloc */
315 lmiter_dealloc, /*tp_dealloc */
318 0, /*tp_print */
316 0, /*tp_print */
319 0, /*tp_getattr */
317 0, /*tp_getattr */
320 0, /*tp_setattr */
318 0, /*tp_setattr */
321 0, /*tp_compare */
319 0, /*tp_compare */
322 0, /*tp_repr */
320 0, /*tp_repr */
323 0, /*tp_as_number */
321 0, /*tp_as_number */
324 0, /*tp_as_sequence */
322 0, /*tp_as_sequence */
325 0, /*tp_as_mapping */
323 0, /*tp_as_mapping */
326 0, /*tp_hash */
324 0, /*tp_hash */
327 0, /*tp_call */
325 0, /*tp_call */
328 0, /*tp_str */
326 0, /*tp_str */
329 0, /*tp_getattro */
327 0, /*tp_getattro */
330 0, /*tp_setattro */
328 0, /*tp_setattro */
331 0, /*tp_as_buffer */
329 0, /*tp_as_buffer */
332 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
330 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
333 "Keys iterator for a lazymanifest.", /* tp_doc */
331 "Keys iterator for a lazymanifest.", /* tp_doc */
334 0, /* tp_traverse */
332 0, /* tp_traverse */
335 0, /* tp_clear */
333 0, /* tp_clear */
336 0, /* tp_richcompare */
334 0, /* tp_richcompare */
337 0, /* tp_weaklistoffset */
335 0, /* tp_weaklistoffset */
338 PyObject_SelfIter, /* tp_iter: __iter__() method */
336 PyObject_SelfIter, /* tp_iter: __iter__() method */
339 lmiter_iterkeysnext, /* tp_iternext: next() method */
337 lmiter_iterkeysnext, /* tp_iternext: next() method */
340 };
338 };
341
339
342 static lazymanifest *lazymanifest_copy(lazymanifest *self);
340 static lazymanifest *lazymanifest_copy(lazymanifest *self);
343
341
344 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
342 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
345 {
343 {
346 lmIter *i = NULL;
344 lmIter *i = NULL;
347 lazymanifest *t = lazymanifest_copy(self);
345 lazymanifest *t = lazymanifest_copy(self);
348 if (!t) {
346 if (!t) {
349 PyErr_NoMemory();
347 PyErr_NoMemory();
350 return NULL;
348 return NULL;
351 }
349 }
352 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
350 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
353 if (i) {
351 if (i) {
354 i->m = t;
352 i->m = t;
355 i->pos = -1;
353 i->pos = -1;
356 } else {
354 } else {
357 Py_DECREF(t);
355 Py_DECREF(t);
358 PyErr_NoMemory();
356 PyErr_NoMemory();
359 }
357 }
360 return (PyObject *)i;
358 return (PyObject *)i;
361 }
359 }
362
360
363 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
361 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
364 {
362 {
365 lmIter *i = NULL;
363 lmIter *i = NULL;
366 lazymanifest *t = lazymanifest_copy(self);
364 lazymanifest *t = lazymanifest_copy(self);
367 if (!t) {
365 if (!t) {
368 PyErr_NoMemory();
366 PyErr_NoMemory();
369 return NULL;
367 return NULL;
370 }
368 }
371 i = PyObject_New(lmIter, &lazymanifestKeysIterator);
369 i = PyObject_New(lmIter, &lazymanifestKeysIterator);
372 if (i) {
370 if (i) {
373 i->m = t;
371 i->m = t;
374 i->pos = -1;
372 i->pos = -1;
375 } else {
373 } else {
376 Py_DECREF(t);
374 Py_DECREF(t);
377 PyErr_NoMemory();
375 PyErr_NoMemory();
378 }
376 }
379 return (PyObject *)i;
377 return (PyObject *)i;
380 }
378 }
381
379
382 /* __getitem__ and __setitem__ support */
380 /* __getitem__ and __setitem__ support */
383
381
384 static Py_ssize_t lazymanifest_size(lazymanifest *self)
382 static Py_ssize_t lazymanifest_size(lazymanifest *self)
385 {
383 {
386 return self->livelines;
384 return self->livelines;
387 }
385 }
388
386
389 static int linecmp(const void *left, const void *right)
387 static int linecmp(const void *left, const void *right)
390 {
388 {
391 return strcmp(((const line *)left)->start,
389 return strcmp(((const line *)left)->start,
392 ((const line *)right)->start);
390 ((const line *)right)->start);
393 }
391 }
394
392
395 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
393 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
396 {
394 {
397 line needle;
395 line needle;
398 line *hit;
396 line *hit;
399 if (!PyBytes_Check(key)) {
397 if (!PyBytes_Check(key)) {
400 PyErr_Format(PyExc_TypeError,
398 PyErr_Format(PyExc_TypeError,
401 "getitem: manifest keys must be a string.");
399 "getitem: manifest keys must be a string.");
402 return NULL;
400 return NULL;
403 }
401 }
404 needle.start = PyBytes_AsString(key);
402 needle.start = PyBytes_AsString(key);
405 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
403 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
406 &linecmp);
404 &linecmp);
407 if (!hit || hit->deleted) {
405 if (!hit || hit->deleted) {
408 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
406 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
409 return NULL;
407 return NULL;
410 }
408 }
411 return hashflags(hit);
409 return hashflags(hit);
412 }
410 }
413
411
414 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
412 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
415 {
413 {
416 line needle;
414 line needle;
417 line *hit;
415 line *hit;
418 if (!PyBytes_Check(key)) {
416 if (!PyBytes_Check(key)) {
419 PyErr_Format(PyExc_TypeError,
417 PyErr_Format(PyExc_TypeError,
420 "delitem: manifest keys must be a string.");
418 "delitem: manifest keys must be a string.");
421 return -1;
419 return -1;
422 }
420 }
423 needle.start = PyBytes_AsString(key);
421 needle.start = PyBytes_AsString(key);
424 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
422 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
425 &linecmp);
423 &linecmp);
426 if (!hit || hit->deleted) {
424 if (!hit || hit->deleted) {
427 PyErr_Format(PyExc_KeyError,
425 PyErr_Format(PyExc_KeyError,
428 "Tried to delete nonexistent manifest entry.");
426 "Tried to delete nonexistent manifest entry.");
429 return -1;
427 return -1;
430 }
428 }
431 self->dirty = true;
429 self->dirty = true;
432 hit->deleted = true;
430 hit->deleted = true;
433 self->livelines--;
431 self->livelines--;
434 return 0;
432 return 0;
435 }
433 }
436
434
437 /* Do a binary search for the insertion point for new, creating the
435 /* Do a binary search for the insertion point for new, creating the
438 * new entry if needed. */
436 * new entry if needed. */
439 static int internalsetitem(lazymanifest *self, line *new)
437 static int internalsetitem(lazymanifest *self, line *new)
440 {
438 {
441 int start = 0, end = self->numlines;
439 int start = 0, end = self->numlines;
442 while (start < end) {
440 while (start < end) {
443 int pos = start + (end - start) / 2;
441 int pos = start + (end - start) / 2;
444 int c = linecmp(new, self->lines + pos);
442 int c = linecmp(new, self->lines + pos);
445 if (c < 0)
443 if (c < 0)
446 end = pos;
444 end = pos;
447 else if (c > 0)
445 else if (c > 0)
448 start = pos + 1;
446 start = pos + 1;
449 else {
447 else {
450 if (self->lines[pos].deleted)
448 if (self->lines[pos].deleted)
451 self->livelines++;
449 self->livelines++;
452 if (self->lines[pos].from_malloc)
450 if (self->lines[pos].from_malloc)
453 free(self->lines[pos].start);
451 free(self->lines[pos].start);
454 start = pos;
452 start = pos;
455 goto finish;
453 goto finish;
456 }
454 }
457 }
455 }
458 /* being here means we need to do an insert */
456 /* being here means we need to do an insert */
459 if (!realloc_if_full(self)) {
457 if (!realloc_if_full(self)) {
460 PyErr_NoMemory();
458 PyErr_NoMemory();
461 return -1;
459 return -1;
462 }
460 }
463 memmove(self->lines + start + 1, self->lines + start,
461 memmove(self->lines + start + 1, self->lines + start,
464 (self->numlines - start) * sizeof(line));
462 (self->numlines - start) * sizeof(line));
465 self->numlines++;
463 self->numlines++;
466 self->livelines++;
464 self->livelines++;
467 finish:
465 finish:
468 self->lines[start] = *new;
466 self->lines[start] = *new;
469 self->dirty = true;
467 self->dirty = true;
470 return 0;
468 return 0;
471 }
469 }
472
470
473 static int lazymanifest_setitem(
471 static int lazymanifest_setitem(
474 lazymanifest *self, PyObject *key, PyObject *value)
472 lazymanifest *self, PyObject *key, PyObject *value)
475 {
473 {
476 char *path;
474 char *path;
477 Py_ssize_t plen;
475 Py_ssize_t plen;
478 PyObject *pyhash;
476 PyObject *pyhash;
479 Py_ssize_t hlen;
477 Py_ssize_t hlen;
480 char *hash;
478 char *hash;
481 PyObject *pyflags;
479 PyObject *pyflags;
482 char *flags;
480 char *flags;
483 Py_ssize_t flen;
481 Py_ssize_t flen;
484 size_t dlen;
482 size_t dlen;
485 char *dest;
483 char *dest;
486 int i;
484 int i;
487 line new;
485 line new;
488 if (!PyBytes_Check(key)) {
486 if (!PyBytes_Check(key)) {
489 PyErr_Format(PyExc_TypeError,
487 PyErr_Format(PyExc_TypeError,
490 "setitem: manifest keys must be a string.");
488 "setitem: manifest keys must be a string.");
491 return -1;
489 return -1;
492 }
490 }
493 if (!value) {
491 if (!value) {
494 return lazymanifest_delitem(self, key);
492 return lazymanifest_delitem(self, key);
495 }
493 }
496 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
494 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
497 PyErr_Format(PyExc_TypeError,
495 PyErr_Format(PyExc_TypeError,
498 "Manifest values must be a tuple of (node, flags).");
496 "Manifest values must be a tuple of (node, flags).");
499 return -1;
497 return -1;
500 }
498 }
501 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
499 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
502 return -1;
500 return -1;
503 }
501 }
504
502
505 pyhash = PyTuple_GetItem(value, 0);
503 pyhash = PyTuple_GetItem(value, 0);
506 if (!PyBytes_Check(pyhash)) {
504 if (!PyBytes_Check(pyhash)) {
507 PyErr_Format(PyExc_TypeError,
505 PyErr_Format(PyExc_TypeError,
508 "node must be a 20-byte string");
506 "node must be a 20-byte string");
509 return -1;
507 return -1;
510 }
508 }
511 hlen = PyBytes_Size(pyhash);
509 hlen = PyBytes_Size(pyhash);
512 /* Some parts of the codebase try and set 21 or 22
510 /* Some parts of the codebase try and set 21 or 22
513 * byte "hash" values in order to perturb things for
511 * byte "hash" values in order to perturb things for
514 * status. We have to preserve at least the 21st
512 * status. We have to preserve at least the 21st
515 * byte. Sigh. If there's a 22nd byte, we drop it on
513 * byte. Sigh. If there's a 22nd byte, we drop it on
516 * the floor, which works fine.
514 * the floor, which works fine.
517 */
515 */
518 if (hlen != 20 && hlen != 21 && hlen != 22) {
516 if (hlen != 20 && hlen != 21 && hlen != 22) {
519 PyErr_Format(PyExc_TypeError,
517 PyErr_Format(PyExc_TypeError,
520 "node must be a 20-byte string");
518 "node must be a 20-byte string");
521 return -1;
519 return -1;
522 }
520 }
523 hash = PyBytes_AsString(pyhash);
521 hash = PyBytes_AsString(pyhash);
524
522
525 pyflags = PyTuple_GetItem(value, 1);
523 pyflags = PyTuple_GetItem(value, 1);
526 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
524 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
527 PyErr_Format(PyExc_TypeError,
525 PyErr_Format(PyExc_TypeError,
528 "flags must a 0 or 1 byte string");
526 "flags must a 0 or 1 byte string");
529 return -1;
527 return -1;
530 }
528 }
531 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
529 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
532 return -1;
530 return -1;
533 }
531 }
534 /* one null byte and one newline */
532 /* one null byte and one newline */
535 dlen = plen + 41 + flen + 1;
533 dlen = plen + 41 + flen + 1;
536 dest = malloc(dlen);
534 dest = malloc(dlen);
537 if (!dest) {
535 if (!dest) {
538 PyErr_NoMemory();
536 PyErr_NoMemory();
539 return -1;
537 return -1;
540 }
538 }
541 memcpy(dest, path, plen + 1);
539 memcpy(dest, path, plen + 1);
542 for (i = 0; i < 20; i++) {
540 for (i = 0; i < 20; i++) {
543 /* Cast to unsigned, so it will not get sign-extended when promoted
541 /* Cast to unsigned, so it will not get sign-extended when promoted
544 * to int (as is done when passing to a variadic function)
542 * to int (as is done when passing to a variadic function)
545 */
543 */
546 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
544 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
547 }
545 }
548 memcpy(dest + plen + 41, flags, flen);
546 memcpy(dest + plen + 41, flags, flen);
549 dest[plen + 41 + flen] = '\n';
547 dest[plen + 41 + flen] = '\n';
550 new.start = dest;
548 new.start = dest;
551 new.len = dlen;
549 new.len = dlen;
552 new.hash_suffix = '\0';
550 new.hash_suffix = '\0';
553 if (hlen > 20) {
551 if (hlen > 20) {
554 new.hash_suffix = hash[20];
552 new.hash_suffix = hash[20];
555 }
553 }
556 new.from_malloc = true; /* is `start` a pointer we allocated? */
554 new.from_malloc = true; /* is `start` a pointer we allocated? */
557 new.deleted = false; /* is this entry deleted? */
555 new.deleted = false; /* is this entry deleted? */
558 if (internalsetitem(self, &new)) {
556 if (internalsetitem(self, &new)) {
559 return -1;
557 return -1;
560 }
558 }
561 return 0;
559 return 0;
562 }
560 }
563
561
564 static PyMappingMethods lazymanifest_mapping_methods = {
562 static PyMappingMethods lazymanifest_mapping_methods = {
565 (lenfunc)lazymanifest_size, /* mp_length */
563 (lenfunc)lazymanifest_size, /* mp_length */
566 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
564 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
567 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
565 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
568 };
566 };
569
567
570 /* sequence methods (important or __contains__ builds an iterator) */
568 /* sequence methods (important or __contains__ builds an iterator) */
571
569
572 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
570 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
573 {
571 {
574 line needle;
572 line needle;
575 line *hit;
573 line *hit;
576 if (!PyBytes_Check(key)) {
574 if (!PyBytes_Check(key)) {
577 /* Our keys are always strings, so if the contains
575 /* Our keys are always strings, so if the contains
578 * check is for a non-string, just return false. */
576 * check is for a non-string, just return false. */
579 return 0;
577 return 0;
580 }
578 }
581 needle.start = PyBytes_AsString(key);
579 needle.start = PyBytes_AsString(key);
582 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
580 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
583 &linecmp);
581 &linecmp);
584 if (!hit || hit->deleted) {
582 if (!hit || hit->deleted) {
585 return 0;
583 return 0;
586 }
584 }
587 return 1;
585 return 1;
588 }
586 }
589
587
590 static PySequenceMethods lazymanifest_seq_meths = {
588 static PySequenceMethods lazymanifest_seq_meths = {
591 (lenfunc)lazymanifest_size, /* sq_length */
589 (lenfunc)lazymanifest_size, /* sq_length */
592 0, /* sq_concat */
590 0, /* sq_concat */
593 0, /* sq_repeat */
591 0, /* sq_repeat */
594 0, /* sq_item */
592 0, /* sq_item */
595 0, /* sq_slice */
593 0, /* sq_slice */
596 0, /* sq_ass_item */
594 0, /* sq_ass_item */
597 0, /* sq_ass_slice */
595 0, /* sq_ass_slice */
598 (objobjproc)lazymanifest_contains, /* sq_contains */
596 (objobjproc)lazymanifest_contains, /* sq_contains */
599 0, /* sq_inplace_concat */
597 0, /* sq_inplace_concat */
600 0, /* sq_inplace_repeat */
598 0, /* sq_inplace_repeat */
601 };
599 };
602
600
603
601
604 /* Other methods (copy, diff, etc) */
602 /* Other methods (copy, diff, etc) */
605 static PyTypeObject lazymanifestType;
603 static PyTypeObject lazymanifestType;
606
604
607 /* If the manifest has changes, build the new manifest text and reindex it. */
605 /* If the manifest has changes, build the new manifest text and reindex it. */
608 static int compact(lazymanifest *self)
606 static int compact(lazymanifest *self)
609 {
607 {
610 int i;
608 int i;
611 ssize_t need = 0;
609 ssize_t need = 0;
612 char *data;
610 char *data;
613 line *src, *dst;
611 line *src, *dst;
614 PyObject *pydata;
612 PyObject *pydata;
615 if (!self->dirty)
613 if (!self->dirty)
616 return 0;
614 return 0;
617 for (i = 0; i < self->numlines; i++) {
615 for (i = 0; i < self->numlines; i++) {
618 if (!self->lines[i].deleted) {
616 if (!self->lines[i].deleted) {
619 need += self->lines[i].len;
617 need += self->lines[i].len;
620 }
618 }
621 }
619 }
622 pydata = PyBytes_FromStringAndSize(NULL, need);
620 pydata = PyBytes_FromStringAndSize(NULL, need);
623 if (!pydata)
621 if (!pydata)
624 return -1;
622 return -1;
625 data = PyBytes_AsString(pydata);
623 data = PyBytes_AsString(pydata);
626 if (!data) {
624 if (!data) {
627 return -1;
625 return -1;
628 }
626 }
629 src = self->lines;
627 src = self->lines;
630 dst = self->lines;
628 dst = self->lines;
631 for (i = 0; i < self->numlines; i++, src++) {
629 for (i = 0; i < self->numlines; i++, src++) {
632 char *tofree = NULL;
630 char *tofree = NULL;
633 if (src->from_malloc) {
631 if (src->from_malloc) {
634 tofree = src->start;
632 tofree = src->start;
635 }
633 }
636 if (!src->deleted) {
634 if (!src->deleted) {
637 memcpy(data, src->start, src->len);
635 memcpy(data, src->start, src->len);
638 *dst = *src;
636 *dst = *src;
639 dst->start = data;
637 dst->start = data;
640 dst->from_malloc = false;
638 dst->from_malloc = false;
641 data += dst->len;
639 data += dst->len;
642 dst++;
640 dst++;
643 }
641 }
644 free(tofree);
642 free(tofree);
645 }
643 }
646 Py_DECREF(self->pydata);
644 Py_DECREF(self->pydata);
647 self->pydata = pydata;
645 self->pydata = pydata;
648 self->numlines = self->livelines;
646 self->numlines = self->livelines;
649 self->dirty = false;
647 self->dirty = false;
650 return 0;
648 return 0;
651 }
649 }
652
650
653 static PyObject *lazymanifest_text(lazymanifest *self)
651 static PyObject *lazymanifest_text(lazymanifest *self)
654 {
652 {
655 if (compact(self) != 0) {
653 if (compact(self) != 0) {
656 PyErr_NoMemory();
654 PyErr_NoMemory();
657 return NULL;
655 return NULL;
658 }
656 }
659 Py_INCREF(self->pydata);
657 Py_INCREF(self->pydata);
660 return self->pydata;
658 return self->pydata;
661 }
659 }
662
660
663 static lazymanifest *lazymanifest_copy(lazymanifest *self)
661 static lazymanifest *lazymanifest_copy(lazymanifest *self)
664 {
662 {
665 lazymanifest *copy = NULL;
663 lazymanifest *copy = NULL;
666 if (compact(self) != 0) {
664 if (compact(self) != 0) {
667 goto nomem;
665 goto nomem;
668 }
666 }
669 copy = PyObject_New(lazymanifest, &lazymanifestType);
667 copy = PyObject_New(lazymanifest, &lazymanifestType);
670 if (!copy) {
668 if (!copy) {
671 goto nomem;
669 goto nomem;
672 }
670 }
673 copy->numlines = self->numlines;
671 copy->numlines = self->numlines;
674 copy->livelines = self->livelines;
672 copy->livelines = self->livelines;
675 copy->dirty = false;
673 copy->dirty = false;
676 copy->lines = malloc(self->maxlines *sizeof(line));
674 copy->lines = malloc(self->maxlines *sizeof(line));
677 if (!copy->lines) {
675 if (!copy->lines) {
678 goto nomem;
676 goto nomem;
679 }
677 }
680 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
678 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
681 copy->maxlines = self->maxlines;
679 copy->maxlines = self->maxlines;
682 copy->pydata = self->pydata;
680 copy->pydata = self->pydata;
683 Py_INCREF(copy->pydata);
681 Py_INCREF(copy->pydata);
684 return copy;
682 return copy;
685 nomem:
683 nomem:
686 PyErr_NoMemory();
684 PyErr_NoMemory();
687 Py_XDECREF(copy);
685 Py_XDECREF(copy);
688 return NULL;
686 return NULL;
689 }
687 }
690
688
691 static lazymanifest *lazymanifest_filtercopy(
689 static lazymanifest *lazymanifest_filtercopy(
692 lazymanifest *self, PyObject *matchfn)
690 lazymanifest *self, PyObject *matchfn)
693 {
691 {
694 lazymanifest *copy = NULL;
692 lazymanifest *copy = NULL;
695 int i;
693 int i;
696 if (!PyCallable_Check(matchfn)) {
694 if (!PyCallable_Check(matchfn)) {
697 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
695 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
698 return NULL;
696 return NULL;
699 }
697 }
700 /* compact ourselves first to avoid double-frees later when we
698 /* compact ourselves first to avoid double-frees later when we
701 * compact tmp so that it doesn't have random pointers to our
699 * compact tmp so that it doesn't have random pointers to our
702 * underlying from_malloc-data (self->pydata is safe) */
700 * underlying from_malloc-data (self->pydata is safe) */
703 if (compact(self) != 0) {
701 if (compact(self) != 0) {
704 goto nomem;
702 goto nomem;
705 }
703 }
706 copy = PyObject_New(lazymanifest, &lazymanifestType);
704 copy = PyObject_New(lazymanifest, &lazymanifestType);
707 if (!copy) {
705 if (!copy) {
708 goto nomem;
706 goto nomem;
709 }
707 }
710 copy->dirty = true;
708 copy->dirty = true;
711 copy->lines = malloc(self->maxlines * sizeof(line));
709 copy->lines = malloc(self->maxlines * sizeof(line));
712 if (!copy->lines) {
710 if (!copy->lines) {
713 goto nomem;
711 goto nomem;
714 }
712 }
715 copy->maxlines = self->maxlines;
713 copy->maxlines = self->maxlines;
716 copy->numlines = 0;
714 copy->numlines = 0;
717 copy->pydata = self->pydata;
715 copy->pydata = self->pydata;
718 Py_INCREF(self->pydata);
716 Py_INCREF(self->pydata);
719 for (i = 0; i < self->numlines; i++) {
717 for (i = 0; i < self->numlines; i++) {
720 PyObject *arglist = NULL, *result = NULL;
718 PyObject *arglist = NULL, *result = NULL;
721 arglist = Py_BuildValue(PY23("(s)", "(y)"),
719 arglist = Py_BuildValue(PY23("(s)", "(y)"),
722 self->lines[i].start);
720 self->lines[i].start);
723 if (!arglist) {
721 if (!arglist) {
724 return NULL;
722 return NULL;
725 }
723 }
726 result = PyObject_CallObject(matchfn, arglist);
724 result = PyObject_CallObject(matchfn, arglist);
727 Py_DECREF(arglist);
725 Py_DECREF(arglist);
728 /* if the callback raised an exception, just let it
726 /* if the callback raised an exception, just let it
729 * through and give up */
727 * through and give up */
730 if (!result) {
728 if (!result) {
731 free(copy->lines);
729 free(copy->lines);
732 Py_DECREF(self->pydata);
730 Py_DECREF(self->pydata);
733 return NULL;
731 return NULL;
734 }
732 }
735 if (PyObject_IsTrue(result)) {
733 if (PyObject_IsTrue(result)) {
736 assert(!(self->lines[i].from_malloc));
734 assert(!(self->lines[i].from_malloc));
737 copy->lines[copy->numlines++] = self->lines[i];
735 copy->lines[copy->numlines++] = self->lines[i];
738 }
736 }
739 Py_DECREF(result);
737 Py_DECREF(result);
740 }
738 }
741 copy->livelines = copy->numlines;
739 copy->livelines = copy->numlines;
742 return copy;
740 return copy;
743 nomem:
741 nomem:
744 PyErr_NoMemory();
742 PyErr_NoMemory();
745 Py_XDECREF(copy);
743 Py_XDECREF(copy);
746 return NULL;
744 return NULL;
747 }
745 }
748
746
749 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
747 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
750 {
748 {
751 lazymanifest *other;
749 lazymanifest *other;
752 PyObject *pyclean = NULL;
750 PyObject *pyclean = NULL;
753 bool listclean;
751 bool listclean;
754 PyObject *emptyTup = NULL, *ret = NULL;
752 PyObject *emptyTup = NULL, *ret = NULL;
755 PyObject *es;
753 PyObject *es;
756 int sneedle = 0, oneedle = 0;
754 int sneedle = 0, oneedle = 0;
757 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
755 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
758 return NULL;
756 return NULL;
759 }
757 }
760 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
758 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
761 es = PyBytes_FromString("");
759 es = PyBytes_FromString("");
762 if (!es) {
760 if (!es) {
763 goto nomem;
761 goto nomem;
764 }
762 }
765 emptyTup = PyTuple_Pack(2, Py_None, es);
763 emptyTup = PyTuple_Pack(2, Py_None, es);
766 Py_DECREF(es);
764 Py_DECREF(es);
767 if (!emptyTup) {
765 if (!emptyTup) {
768 goto nomem;
766 goto nomem;
769 }
767 }
770 ret = PyDict_New();
768 ret = PyDict_New();
771 if (!ret) {
769 if (!ret) {
772 goto nomem;
770 goto nomem;
773 }
771 }
774 while (sneedle != self->numlines || oneedle != other->numlines) {
772 while (sneedle != self->numlines || oneedle != other->numlines) {
775 line *left = self->lines + sneedle;
773 line *left = self->lines + sneedle;
776 line *right = other->lines + oneedle;
774 line *right = other->lines + oneedle;
777 int result;
775 int result;
778 PyObject *key;
776 PyObject *key;
779 PyObject *outer;
777 PyObject *outer;
780 /* If we're looking at a deleted entry and it's not
778 /* If we're looking at a deleted entry and it's not
781 * the end of the manifest, just skip it. */
779 * the end of the manifest, just skip it. */
782 if (sneedle < self->numlines && left->deleted) {
780 if (sneedle < self->numlines && left->deleted) {
783 sneedle++;
781 sneedle++;
784 continue;
782 continue;
785 }
783 }
786 if (oneedle < other->numlines && right->deleted) {
784 if (oneedle < other->numlines && right->deleted) {
787 oneedle++;
785 oneedle++;
788 continue;
786 continue;
789 }
787 }
790 /* if we're at the end of either manifest, then we
788 /* if we're at the end of either manifest, then we
791 * know the remaining items are adds so we can skip
789 * know the remaining items are adds so we can skip
792 * the strcmp. */
790 * the strcmp. */
793 if (sneedle == self->numlines) {
791 if (sneedle == self->numlines) {
794 result = 1;
792 result = 1;
795 } else if (oneedle == other->numlines) {
793 } else if (oneedle == other->numlines) {
796 result = -1;
794 result = -1;
797 } else {
795 } else {
798 result = linecmp(left, right);
796 result = linecmp(left, right);
799 }
797 }
800 key = result <= 0 ?
798 key = result <= 0 ?
801 PyBytes_FromString(left->start) :
799 PyBytes_FromString(left->start) :
802 PyBytes_FromString(right->start);
800 PyBytes_FromString(right->start);
803 if (!key)
801 if (!key)
804 goto nomem;
802 goto nomem;
805 if (result < 0) {
803 if (result < 0) {
806 PyObject *l = hashflags(left);
804 PyObject *l = hashflags(left);
807 if (!l) {
805 if (!l) {
808 goto nomem;
806 goto nomem;
809 }
807 }
810 outer = PyTuple_Pack(2, l, emptyTup);
808 outer = PyTuple_Pack(2, l, emptyTup);
811 Py_DECREF(l);
809 Py_DECREF(l);
812 if (!outer) {
810 if (!outer) {
813 goto nomem;
811 goto nomem;
814 }
812 }
815 PyDict_SetItem(ret, key, outer);
813 PyDict_SetItem(ret, key, outer);
816 Py_DECREF(outer);
814 Py_DECREF(outer);
817 sneedle++;
815 sneedle++;
818 } else if (result > 0) {
816 } else if (result > 0) {
819 PyObject *r = hashflags(right);
817 PyObject *r = hashflags(right);
820 if (!r) {
818 if (!r) {
821 goto nomem;
819 goto nomem;
822 }
820 }
823 outer = PyTuple_Pack(2, emptyTup, r);
821 outer = PyTuple_Pack(2, emptyTup, r);
824 Py_DECREF(r);
822 Py_DECREF(r);
825 if (!outer) {
823 if (!outer) {
826 goto nomem;
824 goto nomem;
827 }
825 }
828 PyDict_SetItem(ret, key, outer);
826 PyDict_SetItem(ret, key, outer);
829 Py_DECREF(outer);
827 Py_DECREF(outer);
830 oneedle++;
828 oneedle++;
831 } else {
829 } else {
832 /* file exists in both manifests */
830 /* file exists in both manifests */
833 if (left->len != right->len
831 if (left->len != right->len
834 || memcmp(left->start, right->start, left->len)
832 || memcmp(left->start, right->start, left->len)
835 || left->hash_suffix != right->hash_suffix) {
833 || left->hash_suffix != right->hash_suffix) {
836 PyObject *l = hashflags(left);
834 PyObject *l = hashflags(left);
837 PyObject *r;
835 PyObject *r;
838 if (!l) {
836 if (!l) {
839 goto nomem;
837 goto nomem;
840 }
838 }
841 r = hashflags(right);
839 r = hashflags(right);
842 if (!r) {
840 if (!r) {
843 Py_DECREF(l);
841 Py_DECREF(l);
844 goto nomem;
842 goto nomem;
845 }
843 }
846 outer = PyTuple_Pack(2, l, r);
844 outer = PyTuple_Pack(2, l, r);
847 Py_DECREF(l);
845 Py_DECREF(l);
848 Py_DECREF(r);
846 Py_DECREF(r);
849 if (!outer) {
847 if (!outer) {
850 goto nomem;
848 goto nomem;
851 }
849 }
852 PyDict_SetItem(ret, key, outer);
850 PyDict_SetItem(ret, key, outer);
853 Py_DECREF(outer);
851 Py_DECREF(outer);
854 } else if (listclean) {
852 } else if (listclean) {
855 PyDict_SetItem(ret, key, Py_None);
853 PyDict_SetItem(ret, key, Py_None);
856 }
854 }
857 sneedle++;
855 sneedle++;
858 oneedle++;
856 oneedle++;
859 }
857 }
860 Py_DECREF(key);
858 Py_DECREF(key);
861 }
859 }
862 Py_DECREF(emptyTup);
860 Py_DECREF(emptyTup);
863 return ret;
861 return ret;
864 nomem:
862 nomem:
865 PyErr_NoMemory();
863 PyErr_NoMemory();
866 Py_XDECREF(ret);
864 Py_XDECREF(ret);
867 Py_XDECREF(emptyTup);
865 Py_XDECREF(emptyTup);
868 return NULL;
866 return NULL;
869 }
867 }
870
868
871 static PyMethodDef lazymanifest_methods[] = {
869 static PyMethodDef lazymanifest_methods[] = {
872 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
870 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
873 "Iterate over file names in this lazymanifest."},
871 "Iterate over file names in this lazymanifest."},
874 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
872 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
875 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
873 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
876 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
874 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
877 "Make a copy of this lazymanifest."},
875 "Make a copy of this lazymanifest."},
878 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
876 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
879 "Make a copy of this manifest filtered by matchfn."},
877 "Make a copy of this manifest filtered by matchfn."},
880 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
878 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
881 "Compare this lazymanifest to another one."},
879 "Compare this lazymanifest to another one."},
882 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
880 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
883 "Encode this manifest to text."},
881 "Encode this manifest to text."},
884 {NULL},
882 {NULL},
885 };
883 };
886
884
887 #ifdef IS_PY3K
885 #ifdef IS_PY3K
888 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
886 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
889 #else
887 #else
890 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
888 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
891 #endif
889 #endif
892
890
893 static PyTypeObject lazymanifestType = {
891 static PyTypeObject lazymanifestType = {
894 PyVarObject_HEAD_INIT(NULL, 0) /* header */
892 PyVarObject_HEAD_INIT(NULL, 0) /* header */
895 "parsers.lazymanifest", /* tp_name */
893 "parsers.lazymanifest", /* tp_name */
896 sizeof(lazymanifest), /* tp_basicsize */
894 sizeof(lazymanifest), /* tp_basicsize */
897 0, /* tp_itemsize */
895 0, /* tp_itemsize */
898 (destructor)lazymanifest_dealloc, /* tp_dealloc */
896 (destructor)lazymanifest_dealloc, /* tp_dealloc */
899 0, /* tp_print */
897 0, /* tp_print */
900 0, /* tp_getattr */
898 0, /* tp_getattr */
901 0, /* tp_setattr */
899 0, /* tp_setattr */
902 0, /* tp_compare */
900 0, /* tp_compare */
903 0, /* tp_repr */
901 0, /* tp_repr */
904 0, /* tp_as_number */
902 0, /* tp_as_number */
905 &lazymanifest_seq_meths, /* tp_as_sequence */
903 &lazymanifest_seq_meths, /* tp_as_sequence */
906 &lazymanifest_mapping_methods, /* tp_as_mapping */
904 &lazymanifest_mapping_methods, /* tp_as_mapping */
907 0, /* tp_hash */
905 0, /* tp_hash */
908 0, /* tp_call */
906 0, /* tp_call */
909 0, /* tp_str */
907 0, /* tp_str */
910 0, /* tp_getattro */
908 0, /* tp_getattro */
911 0, /* tp_setattro */
909 0, /* tp_setattro */
912 0, /* tp_as_buffer */
910 0, /* tp_as_buffer */
913 LAZYMANIFEST_TPFLAGS, /* tp_flags */
911 LAZYMANIFEST_TPFLAGS, /* tp_flags */
914 "TODO(augie)", /* tp_doc */
912 "TODO(augie)", /* tp_doc */
915 0, /* tp_traverse */
913 0, /* tp_traverse */
916 0, /* tp_clear */
914 0, /* tp_clear */
917 0, /* tp_richcompare */
915 0, /* tp_richcompare */
918 0, /* tp_weaklistoffset */
916 0, /* tp_weaklistoffset */
919 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
917 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
920 0, /* tp_iternext */
918 0, /* tp_iternext */
921 lazymanifest_methods, /* tp_methods */
919 lazymanifest_methods, /* tp_methods */
922 0, /* tp_members */
920 0, /* tp_members */
923 0, /* tp_getset */
921 0, /* tp_getset */
924 0, /* tp_base */
922 0, /* tp_base */
925 0, /* tp_dict */
923 0, /* tp_dict */
926 0, /* tp_descr_get */
924 0, /* tp_descr_get */
927 0, /* tp_descr_set */
925 0, /* tp_descr_set */
928 0, /* tp_dictoffset */
926 0, /* tp_dictoffset */
929 (initproc)lazymanifest_init, /* tp_init */
927 (initproc)lazymanifest_init, /* tp_init */
930 0, /* tp_alloc */
928 0, /* tp_alloc */
931 };
929 };
932
930
933 void manifest_module_init(PyObject * mod)
931 void manifest_module_init(PyObject * mod)
934 {
932 {
935 lazymanifestType.tp_new = PyType_GenericNew;
933 lazymanifestType.tp_new = PyType_GenericNew;
936 if (PyType_Ready(&lazymanifestType) < 0)
934 if (PyType_Ready(&lazymanifestType) < 0)
937 return;
935 return;
938 Py_INCREF(&lazymanifestType);
936 Py_INCREF(&lazymanifestType);
939
937
940 PyModule_AddObject(mod, "lazymanifest",
938 PyModule_AddObject(mod, "lazymanifest",
941 (PyObject *)&lazymanifestType);
939 (PyObject *)&lazymanifestType);
942 }
940 }
@@ -1,2089 +1,2087 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <assert.h>
11 #include <assert.h>
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <stddef.h>
13 #include <stddef.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #include "bitmanipulation.h"
16 #include "bitmanipulation.h"
17 #include "charencode.h"
17 #include "charencode.h"
18 #include "util.h"
18 #include "util.h"
19
19
20 #ifdef IS_PY3K
20 #ifdef IS_PY3K
21 /* The mapping of Python types is meant to be temporary to get Python
21 /* The mapping of Python types is meant to be temporary to get Python
22 * 3 to compile. We should remove this once Python 3 support is fully
22 * 3 to compile. We should remove this once Python 3 support is fully
23 * supported and proper types are used in the extensions themselves. */
23 * supported and proper types are used in the extensions themselves. */
24 #define PyInt_Check PyLong_Check
24 #define PyInt_Check PyLong_Check
25 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromLong PyLong_FromLong
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 #define PyInt_AS_LONG PyLong_AS_LONG
27 #define PyInt_AS_LONG PyLong_AS_LONG
28 #define PyInt_AsLong PyLong_AsLong
28 #define PyInt_AsLong PyLong_AsLong
29 #endif
29 #endif
30
30
31 /*
31 /*
32 * A base-16 trie for fast node->rev mapping.
32 * A base-16 trie for fast node->rev mapping.
33 *
33 *
34 * Positive value is index of the next node in the trie
34 * Positive value is index of the next node in the trie
35 * Negative value is a leaf: -(rev + 1)
35 * Negative value is a leaf: -(rev + 1)
36 * Zero is empty
36 * Zero is empty
37 */
37 */
38 typedef struct {
38 typedef struct {
39 int children[16];
39 int children[16];
40 } nodetree;
40 } nodetree;
41
41
42 /*
42 /*
43 * This class has two behaviors.
43 * This class has two behaviors.
44 *
44 *
45 * When used in a list-like way (with integer keys), we decode an
45 * When used in a list-like way (with integer keys), we decode an
46 * entry in a RevlogNG index file on demand. Our last entry is a
46 * entry in a RevlogNG index file on demand. Our last entry is a
47 * sentinel, always a nullid. We have limited support for
47 * sentinel, always a nullid. We have limited support for
48 * integer-keyed insert and delete, only at elements right before the
48 * integer-keyed insert and delete, only at elements right before the
49 * sentinel.
49 * sentinel.
50 *
50 *
51 * With string keys, we lazily perform a reverse mapping from node to
51 * With string keys, we lazily perform a reverse mapping from node to
52 * rev, using a base-16 trie.
52 * rev, using a base-16 trie.
53 */
53 */
54 typedef struct {
54 typedef struct {
55 PyObject_HEAD
55 PyObject_HEAD
56 /* Type-specific fields go here. */
56 /* Type-specific fields go here. */
57 PyObject *data; /* raw bytes of index */
57 PyObject *data; /* raw bytes of index */
58 Py_buffer buf; /* buffer of data */
58 Py_buffer buf; /* buffer of data */
59 PyObject **cache; /* cached tuples */
59 PyObject **cache; /* cached tuples */
60 const char **offsets; /* populated on demand */
60 const char **offsets; /* populated on demand */
61 Py_ssize_t raw_length; /* original number of elements */
61 Py_ssize_t raw_length; /* original number of elements */
62 Py_ssize_t length; /* current number of elements */
62 Py_ssize_t length; /* current number of elements */
63 PyObject *added; /* populated on demand */
63 PyObject *added; /* populated on demand */
64 PyObject *headrevs; /* cache, invalidated on changes */
64 PyObject *headrevs; /* cache, invalidated on changes */
65 PyObject *filteredrevs;/* filtered revs set */
65 PyObject *filteredrevs;/* filtered revs set */
66 nodetree *nt; /* base-16 trie */
66 nodetree *nt; /* base-16 trie */
67 unsigned ntlength; /* # nodes in use */
67 unsigned ntlength; /* # nodes in use */
68 unsigned ntcapacity; /* # nodes allocated */
68 unsigned ntcapacity; /* # nodes allocated */
69 int ntdepth; /* maximum depth of tree */
69 int ntdepth; /* maximum depth of tree */
70 int ntsplits; /* # splits performed */
70 int ntsplits; /* # splits performed */
71 int ntrev; /* last rev scanned */
71 int ntrev; /* last rev scanned */
72 int ntlookups; /* # lookups */
72 int ntlookups; /* # lookups */
73 int ntmisses; /* # lookups that miss the cache */
73 int ntmisses; /* # lookups that miss the cache */
74 int inlined;
74 int inlined;
75 } indexObject;
75 } indexObject;
76
76
77 static Py_ssize_t index_length(const indexObject *self)
77 static Py_ssize_t index_length(const indexObject *self)
78 {
78 {
79 if (self->added == NULL)
79 if (self->added == NULL)
80 return self->length;
80 return self->length;
81 return self->length + PyList_GET_SIZE(self->added);
81 return self->length + PyList_GET_SIZE(self->added);
82 }
82 }
83
83
84 static PyObject *nullentry;
84 static PyObject *nullentry;
85 static const char nullid[20];
85 static const char nullid[20];
86
86
87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
88
88
89 #if LONG_MAX == 0x7fffffffL
89 #if LONG_MAX == 0x7fffffffL
90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
91 #else
91 #else
92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
93 #endif
93 #endif
94
94
95 /* A RevlogNG v1 index entry is 64 bytes long. */
95 /* A RevlogNG v1 index entry is 64 bytes long. */
96 static const long v1_hdrsize = 64;
96 static const long v1_hdrsize = 64;
97
97
98 /*
98 /*
99 * Return a pointer to the beginning of a RevlogNG record.
99 * Return a pointer to the beginning of a RevlogNG record.
100 */
100 */
101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
102 {
102 {
103 if (self->inlined && pos > 0) {
103 if (self->inlined && pos > 0) {
104 if (self->offsets == NULL) {
104 if (self->offsets == NULL) {
105 self->offsets = PyMem_Malloc(self->raw_length *
105 self->offsets = PyMem_Malloc(self->raw_length *
106 sizeof(*self->offsets));
106 sizeof(*self->offsets));
107 if (self->offsets == NULL)
107 if (self->offsets == NULL)
108 return (const char *)PyErr_NoMemory();
108 return (const char *)PyErr_NoMemory();
109 inline_scan(self, self->offsets);
109 inline_scan(self, self->offsets);
110 }
110 }
111 return self->offsets[pos];
111 return self->offsets[pos];
112 }
112 }
113
113
114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
115 }
115 }
116
116
117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
118 int *ps, int maxrev)
118 int *ps, int maxrev)
119 {
119 {
120 if (rev >= self->length - 1) {
120 if (rev >= self->length - 1) {
121 PyObject *tuple = PyList_GET_ITEM(self->added,
121 PyObject *tuple = PyList_GET_ITEM(self->added,
122 rev - self->length + 1);
122 rev - self->length + 1);
123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
125 } else {
125 } else {
126 const char *data = index_deref(self, rev);
126 const char *data = index_deref(self, rev);
127 ps[0] = getbe32(data + 24);
127 ps[0] = getbe32(data + 24);
128 ps[1] = getbe32(data + 28);
128 ps[1] = getbe32(data + 28);
129 }
129 }
130 /* If index file is corrupted, ps[] may point to invalid revisions. So
130 /* If index file is corrupted, ps[] may point to invalid revisions. So
131 * there is a risk of buffer overflow to trust them unconditionally. */
131 * there is a risk of buffer overflow to trust them unconditionally. */
132 if (ps[0] > maxrev || ps[1] > maxrev) {
132 if (ps[0] > maxrev || ps[1] > maxrev) {
133 PyErr_SetString(PyExc_ValueError, "parent out of range");
133 PyErr_SetString(PyExc_ValueError, "parent out of range");
134 return -1;
134 return -1;
135 }
135 }
136 return 0;
136 return 0;
137 }
137 }
138
138
139
139
140 /*
140 /*
141 * RevlogNG format (all in big endian, data may be inlined):
141 * RevlogNG format (all in big endian, data may be inlined):
142 * 6 bytes: offset
142 * 6 bytes: offset
143 * 2 bytes: flags
143 * 2 bytes: flags
144 * 4 bytes: compressed length
144 * 4 bytes: compressed length
145 * 4 bytes: uncompressed length
145 * 4 bytes: uncompressed length
146 * 4 bytes: base revision
146 * 4 bytes: base revision
147 * 4 bytes: link revision
147 * 4 bytes: link revision
148 * 4 bytes: parent 1 revision
148 * 4 bytes: parent 1 revision
149 * 4 bytes: parent 2 revision
149 * 4 bytes: parent 2 revision
150 * 32 bytes: nodeid (only 20 bytes used)
150 * 32 bytes: nodeid (only 20 bytes used)
151 */
151 */
152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
153 {
153 {
154 uint64_t offset_flags;
154 uint64_t offset_flags;
155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
156 const char *c_node_id;
156 const char *c_node_id;
157 const char *data;
157 const char *data;
158 Py_ssize_t length = index_length(self);
158 Py_ssize_t length = index_length(self);
159 PyObject *entry;
159 PyObject *entry;
160
160
161 if (pos < 0)
161 if (pos < 0)
162 pos += length;
162 pos += length;
163
163
164 if (pos < 0 || pos >= length) {
164 if (pos < 0 || pos >= length) {
165 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
165 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
166 return NULL;
166 return NULL;
167 }
167 }
168
168
169 if (pos == length - 1) {
169 if (pos == length - 1) {
170 Py_INCREF(nullentry);
170 Py_INCREF(nullentry);
171 return nullentry;
171 return nullentry;
172 }
172 }
173
173
174 if (pos >= self->length - 1) {
174 if (pos >= self->length - 1) {
175 PyObject *obj;
175 PyObject *obj;
176 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
176 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
177 Py_INCREF(obj);
177 Py_INCREF(obj);
178 return obj;
178 return obj;
179 }
179 }
180
180
181 if (self->cache) {
181 if (self->cache) {
182 if (self->cache[pos]) {
182 if (self->cache[pos]) {
183 Py_INCREF(self->cache[pos]);
183 Py_INCREF(self->cache[pos]);
184 return self->cache[pos];
184 return self->cache[pos];
185 }
185 }
186 } else {
186 } else {
187 self->cache = calloc(self->raw_length, sizeof(PyObject *));
187 self->cache = calloc(self->raw_length, sizeof(PyObject *));
188 if (self->cache == NULL)
188 if (self->cache == NULL)
189 return PyErr_NoMemory();
189 return PyErr_NoMemory();
190 }
190 }
191
191
192 data = index_deref(self, pos);
192 data = index_deref(self, pos);
193 if (data == NULL)
193 if (data == NULL)
194 return NULL;
194 return NULL;
195
195
196 offset_flags = getbe32(data + 4);
196 offset_flags = getbe32(data + 4);
197 if (pos == 0) /* mask out version number for the first entry */
197 if (pos == 0) /* mask out version number for the first entry */
198 offset_flags &= 0xFFFF;
198 offset_flags &= 0xFFFF;
199 else {
199 else {
200 uint32_t offset_high = getbe32(data);
200 uint32_t offset_high = getbe32(data);
201 offset_flags |= ((uint64_t)offset_high) << 32;
201 offset_flags |= ((uint64_t)offset_high) << 32;
202 }
202 }
203
203
204 comp_len = getbe32(data + 8);
204 comp_len = getbe32(data + 8);
205 uncomp_len = getbe32(data + 12);
205 uncomp_len = getbe32(data + 12);
206 base_rev = getbe32(data + 16);
206 base_rev = getbe32(data + 16);
207 link_rev = getbe32(data + 20);
207 link_rev = getbe32(data + 20);
208 parent_1 = getbe32(data + 24);
208 parent_1 = getbe32(data + 24);
209 parent_2 = getbe32(data + 28);
209 parent_2 = getbe32(data + 28);
210 c_node_id = data + 32;
210 c_node_id = data + 32;
211
211
212 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
212 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
213 uncomp_len, base_rev, link_rev,
213 uncomp_len, base_rev, link_rev,
214 parent_1, parent_2, c_node_id, 20);
214 parent_1, parent_2, c_node_id, 20);
215
215
216 if (entry) {
216 if (entry) {
217 PyObject_GC_UnTrack(entry);
217 PyObject_GC_UnTrack(entry);
218 Py_INCREF(entry);
218 Py_INCREF(entry);
219 }
219 }
220
220
221 self->cache[pos] = entry;
221 self->cache[pos] = entry;
222
222
223 return entry;
223 return entry;
224 }
224 }
225
225
226 /*
226 /*
227 * Return the 20-byte SHA of the node corresponding to the given rev.
227 * Return the 20-byte SHA of the node corresponding to the given rev.
228 */
228 */
229 static const char *index_node(indexObject *self, Py_ssize_t pos)
229 static const char *index_node(indexObject *self, Py_ssize_t pos)
230 {
230 {
231 Py_ssize_t length = index_length(self);
231 Py_ssize_t length = index_length(self);
232 const char *data;
232 const char *data;
233
233
234 if (pos == length - 1 || pos == INT_MAX)
234 if (pos == length - 1 || pos == INT_MAX)
235 return nullid;
235 return nullid;
236
236
237 if (pos >= length)
237 if (pos >= length)
238 return NULL;
238 return NULL;
239
239
240 if (pos >= self->length - 1) {
240 if (pos >= self->length - 1) {
241 PyObject *tuple, *str;
241 PyObject *tuple, *str;
242 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
242 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
243 str = PyTuple_GetItem(tuple, 7);
243 str = PyTuple_GetItem(tuple, 7);
244 return str ? PyBytes_AS_STRING(str) : NULL;
244 return str ? PyBytes_AS_STRING(str) : NULL;
245 }
245 }
246
246
247 data = index_deref(self, pos);
247 data = index_deref(self, pos);
248 return data ? data + 32 : NULL;
248 return data ? data + 32 : NULL;
249 }
249 }
250
250
251 static int nt_insert(indexObject *self, const char *node, int rev);
251 static int nt_insert(indexObject *self, const char *node, int rev);
252
252
253 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
253 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
254 {
254 {
255 if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1)
255 if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1)
256 return -1;
256 return -1;
257 if (*nodelen == 20)
257 if (*nodelen == 20)
258 return 0;
258 return 0;
259 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
259 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
260 return -1;
260 return -1;
261 }
261 }
262
262
263 static PyObject *index_insert(indexObject *self, PyObject *args)
263 static PyObject *index_insert(indexObject *self, PyObject *args)
264 {
264 {
265 PyObject *obj;
265 PyObject *obj;
266 char *node;
266 char *node;
267 int index;
267 int index;
268 Py_ssize_t len, nodelen;
268 Py_ssize_t len, nodelen;
269
269
270 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
270 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
271 return NULL;
271 return NULL;
272
272
273 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
273 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
274 PyErr_SetString(PyExc_TypeError, "8-tuple required");
274 PyErr_SetString(PyExc_TypeError, "8-tuple required");
275 return NULL;
275 return NULL;
276 }
276 }
277
277
278 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
278 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
279 return NULL;
279 return NULL;
280
280
281 len = index_length(self);
281 len = index_length(self);
282
282
283 if (index < 0)
283 if (index < 0)
284 index += len;
284 index += len;
285
285
286 if (index != len - 1) {
286 if (index != len - 1) {
287 PyErr_SetString(PyExc_IndexError,
287 PyErr_SetString(PyExc_IndexError,
288 "insert only supported at index -1");
288 "insert only supported at index -1");
289 return NULL;
289 return NULL;
290 }
290 }
291
291
292 if (self->added == NULL) {
292 if (self->added == NULL) {
293 self->added = PyList_New(0);
293 self->added = PyList_New(0);
294 if (self->added == NULL)
294 if (self->added == NULL)
295 return NULL;
295 return NULL;
296 }
296 }
297
297
298 if (PyList_Append(self->added, obj) == -1)
298 if (PyList_Append(self->added, obj) == -1)
299 return NULL;
299 return NULL;
300
300
301 if (self->nt)
301 if (self->nt)
302 nt_insert(self, node, index);
302 nt_insert(self, node, index);
303
303
304 Py_CLEAR(self->headrevs);
304 Py_CLEAR(self->headrevs);
305 Py_RETURN_NONE;
305 Py_RETURN_NONE;
306 }
306 }
307
307
308 static void _index_clearcaches(indexObject *self)
308 static void _index_clearcaches(indexObject *self)
309 {
309 {
310 if (self->cache) {
310 if (self->cache) {
311 Py_ssize_t i;
311 Py_ssize_t i;
312
312
313 for (i = 0; i < self->raw_length; i++)
313 for (i = 0; i < self->raw_length; i++)
314 Py_CLEAR(self->cache[i]);
314 Py_CLEAR(self->cache[i]);
315 free(self->cache);
315 free(self->cache);
316 self->cache = NULL;
316 self->cache = NULL;
317 }
317 }
318 if (self->offsets) {
318 if (self->offsets) {
319 PyMem_Free(self->offsets);
319 PyMem_Free(self->offsets);
320 self->offsets = NULL;
320 self->offsets = NULL;
321 }
321 }
322 if (self->nt) {
322 free(self->nt);
323 free(self->nt);
323 self->nt = NULL;
324 self->nt = NULL;
325 }
326 Py_CLEAR(self->headrevs);
324 Py_CLEAR(self->headrevs);
327 }
325 }
328
326
329 static PyObject *index_clearcaches(indexObject *self)
327 static PyObject *index_clearcaches(indexObject *self)
330 {
328 {
331 _index_clearcaches(self);
329 _index_clearcaches(self);
332 self->ntlength = self->ntcapacity = 0;
330 self->ntlength = self->ntcapacity = 0;
333 self->ntdepth = self->ntsplits = 0;
331 self->ntdepth = self->ntsplits = 0;
334 self->ntrev = -1;
332 self->ntrev = -1;
335 self->ntlookups = self->ntmisses = 0;
333 self->ntlookups = self->ntmisses = 0;
336 Py_RETURN_NONE;
334 Py_RETURN_NONE;
337 }
335 }
338
336
339 static PyObject *index_stats(indexObject *self)
337 static PyObject *index_stats(indexObject *self)
340 {
338 {
341 PyObject *obj = PyDict_New();
339 PyObject *obj = PyDict_New();
342 PyObject *t = NULL;
340 PyObject *t = NULL;
343
341
344 if (obj == NULL)
342 if (obj == NULL)
345 return NULL;
343 return NULL;
346
344
347 #define istat(__n, __d) \
345 #define istat(__n, __d) \
348 do { \
346 do { \
349 t = PyInt_FromSsize_t(self->__n); \
347 t = PyInt_FromSsize_t(self->__n); \
350 if (!t) \
348 if (!t) \
351 goto bail; \
349 goto bail; \
352 if (PyDict_SetItemString(obj, __d, t) == -1) \
350 if (PyDict_SetItemString(obj, __d, t) == -1) \
353 goto bail; \
351 goto bail; \
354 Py_DECREF(t); \
352 Py_DECREF(t); \
355 } while (0)
353 } while (0)
356
354
357 if (self->added) {
355 if (self->added) {
358 Py_ssize_t len = PyList_GET_SIZE(self->added);
356 Py_ssize_t len = PyList_GET_SIZE(self->added);
359 t = PyInt_FromSsize_t(len);
357 t = PyInt_FromSsize_t(len);
360 if (!t)
358 if (!t)
361 goto bail;
359 goto bail;
362 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
360 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
363 goto bail;
361 goto bail;
364 Py_DECREF(t);
362 Py_DECREF(t);
365 }
363 }
366
364
367 if (self->raw_length != self->length - 1)
365 if (self->raw_length != self->length - 1)
368 istat(raw_length, "revs on disk");
366 istat(raw_length, "revs on disk");
369 istat(length, "revs in memory");
367 istat(length, "revs in memory");
370 istat(ntcapacity, "node trie capacity");
368 istat(ntcapacity, "node trie capacity");
371 istat(ntdepth, "node trie depth");
369 istat(ntdepth, "node trie depth");
372 istat(ntlength, "node trie count");
370 istat(ntlength, "node trie count");
373 istat(ntlookups, "node trie lookups");
371 istat(ntlookups, "node trie lookups");
374 istat(ntmisses, "node trie misses");
372 istat(ntmisses, "node trie misses");
375 istat(ntrev, "node trie last rev scanned");
373 istat(ntrev, "node trie last rev scanned");
376 istat(ntsplits, "node trie splits");
374 istat(ntsplits, "node trie splits");
377
375
378 #undef istat
376 #undef istat
379
377
380 return obj;
378 return obj;
381
379
382 bail:
380 bail:
383 Py_XDECREF(obj);
381 Py_XDECREF(obj);
384 Py_XDECREF(t);
382 Py_XDECREF(t);
385 return NULL;
383 return NULL;
386 }
384 }
387
385
388 /*
386 /*
389 * When we cache a list, we want to be sure the caller can't mutate
387 * When we cache a list, we want to be sure the caller can't mutate
390 * the cached copy.
388 * the cached copy.
391 */
389 */
392 static PyObject *list_copy(PyObject *list)
390 static PyObject *list_copy(PyObject *list)
393 {
391 {
394 Py_ssize_t len = PyList_GET_SIZE(list);
392 Py_ssize_t len = PyList_GET_SIZE(list);
395 PyObject *newlist = PyList_New(len);
393 PyObject *newlist = PyList_New(len);
396 Py_ssize_t i;
394 Py_ssize_t i;
397
395
398 if (newlist == NULL)
396 if (newlist == NULL)
399 return NULL;
397 return NULL;
400
398
401 for (i = 0; i < len; i++) {
399 for (i = 0; i < len; i++) {
402 PyObject *obj = PyList_GET_ITEM(list, i);
400 PyObject *obj = PyList_GET_ITEM(list, i);
403 Py_INCREF(obj);
401 Py_INCREF(obj);
404 PyList_SET_ITEM(newlist, i, obj);
402 PyList_SET_ITEM(newlist, i, obj);
405 }
403 }
406
404
407 return newlist;
405 return newlist;
408 }
406 }
409
407
410 static int check_filter(PyObject *filter, Py_ssize_t arg)
408 static int check_filter(PyObject *filter, Py_ssize_t arg)
411 {
409 {
412 if (filter) {
410 if (filter) {
413 PyObject *arglist, *result;
411 PyObject *arglist, *result;
414 int isfiltered;
412 int isfiltered;
415
413
416 arglist = Py_BuildValue("(n)", arg);
414 arglist = Py_BuildValue("(n)", arg);
417 if (!arglist) {
415 if (!arglist) {
418 return -1;
416 return -1;
419 }
417 }
420
418
421 result = PyEval_CallObject(filter, arglist);
419 result = PyEval_CallObject(filter, arglist);
422 Py_DECREF(arglist);
420 Py_DECREF(arglist);
423 if (!result) {
421 if (!result) {
424 return -1;
422 return -1;
425 }
423 }
426
424
427 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
425 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
428 * same as this function, so we can just return it directly.*/
426 * same as this function, so we can just return it directly.*/
429 isfiltered = PyObject_IsTrue(result);
427 isfiltered = PyObject_IsTrue(result);
430 Py_DECREF(result);
428 Py_DECREF(result);
431 return isfiltered;
429 return isfiltered;
432 } else {
430 } else {
433 return 0;
431 return 0;
434 }
432 }
435 }
433 }
436
434
437 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
435 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
438 Py_ssize_t marker, char *phases)
436 Py_ssize_t marker, char *phases)
439 {
437 {
440 PyObject *iter = NULL;
438 PyObject *iter = NULL;
441 PyObject *iter_item = NULL;
439 PyObject *iter_item = NULL;
442 Py_ssize_t min_idx = index_length(self) + 1;
440 Py_ssize_t min_idx = index_length(self) + 1;
443 long iter_item_long;
441 long iter_item_long;
444
442
445 if (PyList_GET_SIZE(list) != 0) {
443 if (PyList_GET_SIZE(list) != 0) {
446 iter = PyObject_GetIter(list);
444 iter = PyObject_GetIter(list);
447 if (iter == NULL)
445 if (iter == NULL)
448 return -2;
446 return -2;
449 while ((iter_item = PyIter_Next(iter))) {
447 while ((iter_item = PyIter_Next(iter))) {
450 iter_item_long = PyInt_AS_LONG(iter_item);
448 iter_item_long = PyInt_AS_LONG(iter_item);
451 Py_DECREF(iter_item);
449 Py_DECREF(iter_item);
452 if (iter_item_long < min_idx)
450 if (iter_item_long < min_idx)
453 min_idx = iter_item_long;
451 min_idx = iter_item_long;
454 phases[iter_item_long] = marker;
452 phases[iter_item_long] = marker;
455 }
453 }
456 Py_DECREF(iter);
454 Py_DECREF(iter);
457 }
455 }
458
456
459 return min_idx;
457 return min_idx;
460 }
458 }
461
459
462 static inline void set_phase_from_parents(char *phases, int parent_1,
460 static inline void set_phase_from_parents(char *phases, int parent_1,
463 int parent_2, Py_ssize_t i)
461 int parent_2, Py_ssize_t i)
464 {
462 {
465 if (parent_1 >= 0 && phases[parent_1] > phases[i])
463 if (parent_1 >= 0 && phases[parent_1] > phases[i])
466 phases[i] = phases[parent_1];
464 phases[i] = phases[parent_1];
467 if (parent_2 >= 0 && phases[parent_2] > phases[i])
465 if (parent_2 >= 0 && phases[parent_2] > phases[i])
468 phases[i] = phases[parent_2];
466 phases[i] = phases[parent_2];
469 }
467 }
470
468
471 static PyObject *reachableroots2(indexObject *self, PyObject *args)
469 static PyObject *reachableroots2(indexObject *self, PyObject *args)
472 {
470 {
473
471
474 /* Input */
472 /* Input */
475 long minroot;
473 long minroot;
476 PyObject *includepatharg = NULL;
474 PyObject *includepatharg = NULL;
477 int includepath = 0;
475 int includepath = 0;
478 /* heads and roots are lists */
476 /* heads and roots are lists */
479 PyObject *heads = NULL;
477 PyObject *heads = NULL;
480 PyObject *roots = NULL;
478 PyObject *roots = NULL;
481 PyObject *reachable = NULL;
479 PyObject *reachable = NULL;
482
480
483 PyObject *val;
481 PyObject *val;
484 Py_ssize_t len = index_length(self) - 1;
482 Py_ssize_t len = index_length(self) - 1;
485 long revnum;
483 long revnum;
486 Py_ssize_t k;
484 Py_ssize_t k;
487 Py_ssize_t i;
485 Py_ssize_t i;
488 Py_ssize_t l;
486 Py_ssize_t l;
489 int r;
487 int r;
490 int parents[2];
488 int parents[2];
491
489
492 /* Internal data structure:
490 /* Internal data structure:
493 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
491 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
494 * revstates: array of length len+1 (all revs + nullrev) */
492 * revstates: array of length len+1 (all revs + nullrev) */
495 int *tovisit = NULL;
493 int *tovisit = NULL;
496 long lentovisit = 0;
494 long lentovisit = 0;
497 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
495 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
498 char *revstates = NULL;
496 char *revstates = NULL;
499
497
500 /* Get arguments */
498 /* Get arguments */
501 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
499 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
502 &PyList_Type, &roots,
500 &PyList_Type, &roots,
503 &PyBool_Type, &includepatharg))
501 &PyBool_Type, &includepatharg))
504 goto bail;
502 goto bail;
505
503
506 if (includepatharg == Py_True)
504 if (includepatharg == Py_True)
507 includepath = 1;
505 includepath = 1;
508
506
509 /* Initialize return set */
507 /* Initialize return set */
510 reachable = PyList_New(0);
508 reachable = PyList_New(0);
511 if (reachable == NULL)
509 if (reachable == NULL)
512 goto bail;
510 goto bail;
513
511
514 /* Initialize internal datastructures */
512 /* Initialize internal datastructures */
515 tovisit = (int *)malloc((len + 1) * sizeof(int));
513 tovisit = (int *)malloc((len + 1) * sizeof(int));
516 if (tovisit == NULL) {
514 if (tovisit == NULL) {
517 PyErr_NoMemory();
515 PyErr_NoMemory();
518 goto bail;
516 goto bail;
519 }
517 }
520
518
521 revstates = (char *)calloc(len + 1, 1);
519 revstates = (char *)calloc(len + 1, 1);
522 if (revstates == NULL) {
520 if (revstates == NULL) {
523 PyErr_NoMemory();
521 PyErr_NoMemory();
524 goto bail;
522 goto bail;
525 }
523 }
526
524
527 l = PyList_GET_SIZE(roots);
525 l = PyList_GET_SIZE(roots);
528 for (i = 0; i < l; i++) {
526 for (i = 0; i < l; i++) {
529 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
527 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
530 if (revnum == -1 && PyErr_Occurred())
528 if (revnum == -1 && PyErr_Occurred())
531 goto bail;
529 goto bail;
532 /* If root is out of range, e.g. wdir(), it must be unreachable
530 /* If root is out of range, e.g. wdir(), it must be unreachable
533 * from heads. So we can just ignore it. */
531 * from heads. So we can just ignore it. */
534 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
532 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
535 continue;
533 continue;
536 revstates[revnum + 1] |= RS_ROOT;
534 revstates[revnum + 1] |= RS_ROOT;
537 }
535 }
538
536
539 /* Populate tovisit with all the heads */
537 /* Populate tovisit with all the heads */
540 l = PyList_GET_SIZE(heads);
538 l = PyList_GET_SIZE(heads);
541 for (i = 0; i < l; i++) {
539 for (i = 0; i < l; i++) {
542 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
540 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
543 if (revnum == -1 && PyErr_Occurred())
541 if (revnum == -1 && PyErr_Occurred())
544 goto bail;
542 goto bail;
545 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
543 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
546 PyErr_SetString(PyExc_IndexError, "head out of range");
544 PyErr_SetString(PyExc_IndexError, "head out of range");
547 goto bail;
545 goto bail;
548 }
546 }
549 if (!(revstates[revnum + 1] & RS_SEEN)) {
547 if (!(revstates[revnum + 1] & RS_SEEN)) {
550 tovisit[lentovisit++] = (int)revnum;
548 tovisit[lentovisit++] = (int)revnum;
551 revstates[revnum + 1] |= RS_SEEN;
549 revstates[revnum + 1] |= RS_SEEN;
552 }
550 }
553 }
551 }
554
552
555 /* Visit the tovisit list and find the reachable roots */
553 /* Visit the tovisit list and find the reachable roots */
556 k = 0;
554 k = 0;
557 while (k < lentovisit) {
555 while (k < lentovisit) {
558 /* Add the node to reachable if it is a root*/
556 /* Add the node to reachable if it is a root*/
559 revnum = tovisit[k++];
557 revnum = tovisit[k++];
560 if (revstates[revnum + 1] & RS_ROOT) {
558 if (revstates[revnum + 1] & RS_ROOT) {
561 revstates[revnum + 1] |= RS_REACHABLE;
559 revstates[revnum + 1] |= RS_REACHABLE;
562 val = PyInt_FromLong(revnum);
560 val = PyInt_FromLong(revnum);
563 if (val == NULL)
561 if (val == NULL)
564 goto bail;
562 goto bail;
565 r = PyList_Append(reachable, val);
563 r = PyList_Append(reachable, val);
566 Py_DECREF(val);
564 Py_DECREF(val);
567 if (r < 0)
565 if (r < 0)
568 goto bail;
566 goto bail;
569 if (includepath == 0)
567 if (includepath == 0)
570 continue;
568 continue;
571 }
569 }
572
570
573 /* Add its parents to the list of nodes to visit */
571 /* Add its parents to the list of nodes to visit */
574 if (revnum == -1)
572 if (revnum == -1)
575 continue;
573 continue;
576 r = index_get_parents(self, revnum, parents, (int)len - 1);
574 r = index_get_parents(self, revnum, parents, (int)len - 1);
577 if (r < 0)
575 if (r < 0)
578 goto bail;
576 goto bail;
579 for (i = 0; i < 2; i++) {
577 for (i = 0; i < 2; i++) {
580 if (!(revstates[parents[i] + 1] & RS_SEEN)
578 if (!(revstates[parents[i] + 1] & RS_SEEN)
581 && parents[i] >= minroot) {
579 && parents[i] >= minroot) {
582 tovisit[lentovisit++] = parents[i];
580 tovisit[lentovisit++] = parents[i];
583 revstates[parents[i] + 1] |= RS_SEEN;
581 revstates[parents[i] + 1] |= RS_SEEN;
584 }
582 }
585 }
583 }
586 }
584 }
587
585
588 /* Find all the nodes in between the roots we found and the heads
586 /* Find all the nodes in between the roots we found and the heads
589 * and add them to the reachable set */
587 * and add them to the reachable set */
590 if (includepath == 1) {
588 if (includepath == 1) {
591 long minidx = minroot;
589 long minidx = minroot;
592 if (minidx < 0)
590 if (minidx < 0)
593 minidx = 0;
591 minidx = 0;
594 for (i = minidx; i < len; i++) {
592 for (i = minidx; i < len; i++) {
595 if (!(revstates[i + 1] & RS_SEEN))
593 if (!(revstates[i + 1] & RS_SEEN))
596 continue;
594 continue;
597 r = index_get_parents(self, i, parents, (int)len - 1);
595 r = index_get_parents(self, i, parents, (int)len - 1);
598 /* Corrupted index file, error is set from
596 /* Corrupted index file, error is set from
599 * index_get_parents */
597 * index_get_parents */
600 if (r < 0)
598 if (r < 0)
601 goto bail;
599 goto bail;
602 if (((revstates[parents[0] + 1] |
600 if (((revstates[parents[0] + 1] |
603 revstates[parents[1] + 1]) & RS_REACHABLE)
601 revstates[parents[1] + 1]) & RS_REACHABLE)
604 && !(revstates[i + 1] & RS_REACHABLE)) {
602 && !(revstates[i + 1] & RS_REACHABLE)) {
605 revstates[i + 1] |= RS_REACHABLE;
603 revstates[i + 1] |= RS_REACHABLE;
606 val = PyInt_FromLong(i);
604 val = PyInt_FromLong(i);
607 if (val == NULL)
605 if (val == NULL)
608 goto bail;
606 goto bail;
609 r = PyList_Append(reachable, val);
607 r = PyList_Append(reachable, val);
610 Py_DECREF(val);
608 Py_DECREF(val);
611 if (r < 0)
609 if (r < 0)
612 goto bail;
610 goto bail;
613 }
611 }
614 }
612 }
615 }
613 }
616
614
617 free(revstates);
615 free(revstates);
618 free(tovisit);
616 free(tovisit);
619 return reachable;
617 return reachable;
620 bail:
618 bail:
621 Py_XDECREF(reachable);
619 Py_XDECREF(reachable);
622 free(revstates);
620 free(revstates);
623 free(tovisit);
621 free(tovisit);
624 return NULL;
622 return NULL;
625 }
623 }
626
624
627 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
625 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
628 {
626 {
629 PyObject *roots = Py_None;
627 PyObject *roots = Py_None;
630 PyObject *ret = NULL;
628 PyObject *ret = NULL;
631 PyObject *phasessize = NULL;
629 PyObject *phasessize = NULL;
632 PyObject *phaseroots = NULL;
630 PyObject *phaseroots = NULL;
633 PyObject *phaseset = NULL;
631 PyObject *phaseset = NULL;
634 PyObject *phasessetlist = NULL;
632 PyObject *phasessetlist = NULL;
635 PyObject *rev = NULL;
633 PyObject *rev = NULL;
636 Py_ssize_t len = index_length(self) - 1;
634 Py_ssize_t len = index_length(self) - 1;
637 Py_ssize_t numphase = 0;
635 Py_ssize_t numphase = 0;
638 Py_ssize_t minrevallphases = 0;
636 Py_ssize_t minrevallphases = 0;
639 Py_ssize_t minrevphase = 0;
637 Py_ssize_t minrevphase = 0;
640 Py_ssize_t i = 0;
638 Py_ssize_t i = 0;
641 char *phases = NULL;
639 char *phases = NULL;
642 long phase;
640 long phase;
643
641
644 if (!PyArg_ParseTuple(args, "O", &roots))
642 if (!PyArg_ParseTuple(args, "O", &roots))
645 goto done;
643 goto done;
646 if (roots == NULL || !PyList_Check(roots)) {
644 if (roots == NULL || !PyList_Check(roots)) {
647 PyErr_SetString(PyExc_TypeError, "roots must be a list");
645 PyErr_SetString(PyExc_TypeError, "roots must be a list");
648 goto done;
646 goto done;
649 }
647 }
650
648
651 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
649 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
652 if (phases == NULL) {
650 if (phases == NULL) {
653 PyErr_NoMemory();
651 PyErr_NoMemory();
654 goto done;
652 goto done;
655 }
653 }
656 /* Put the phase information of all the roots in phases */
654 /* Put the phase information of all the roots in phases */
657 numphase = PyList_GET_SIZE(roots)+1;
655 numphase = PyList_GET_SIZE(roots)+1;
658 minrevallphases = len + 1;
656 minrevallphases = len + 1;
659 phasessetlist = PyList_New(numphase);
657 phasessetlist = PyList_New(numphase);
660 if (phasessetlist == NULL)
658 if (phasessetlist == NULL)
661 goto done;
659 goto done;
662
660
663 PyList_SET_ITEM(phasessetlist, 0, Py_None);
661 PyList_SET_ITEM(phasessetlist, 0, Py_None);
664 Py_INCREF(Py_None);
662 Py_INCREF(Py_None);
665
663
666 for (i = 0; i < numphase-1; i++) {
664 for (i = 0; i < numphase-1; i++) {
667 phaseroots = PyList_GET_ITEM(roots, i);
665 phaseroots = PyList_GET_ITEM(roots, i);
668 phaseset = PySet_New(NULL);
666 phaseset = PySet_New(NULL);
669 if (phaseset == NULL)
667 if (phaseset == NULL)
670 goto release;
668 goto release;
671 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
669 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
672 if (!PyList_Check(phaseroots)) {
670 if (!PyList_Check(phaseroots)) {
673 PyErr_SetString(PyExc_TypeError,
671 PyErr_SetString(PyExc_TypeError,
674 "roots item must be a list");
672 "roots item must be a list");
675 goto release;
673 goto release;
676 }
674 }
677 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
675 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
678 if (minrevphase == -2) /* Error from add_roots_get_min */
676 if (minrevphase == -2) /* Error from add_roots_get_min */
679 goto release;
677 goto release;
680 minrevallphases = MIN(minrevallphases, minrevphase);
678 minrevallphases = MIN(minrevallphases, minrevphase);
681 }
679 }
682 /* Propagate the phase information from the roots to the revs */
680 /* Propagate the phase information from the roots to the revs */
683 if (minrevallphases != -1) {
681 if (minrevallphases != -1) {
684 int parents[2];
682 int parents[2];
685 for (i = minrevallphases; i < len; i++) {
683 for (i = minrevallphases; i < len; i++) {
686 if (index_get_parents(self, i, parents,
684 if (index_get_parents(self, i, parents,
687 (int)len - 1) < 0)
685 (int)len - 1) < 0)
688 goto release;
686 goto release;
689 set_phase_from_parents(phases, parents[0], parents[1], i);
687 set_phase_from_parents(phases, parents[0], parents[1], i);
690 }
688 }
691 }
689 }
692 /* Transform phase list to a python list */
690 /* Transform phase list to a python list */
693 phasessize = PyInt_FromLong(len);
691 phasessize = PyInt_FromLong(len);
694 if (phasessize == NULL)
692 if (phasessize == NULL)
695 goto release;
693 goto release;
696 for (i = 0; i < len; i++) {
694 for (i = 0; i < len; i++) {
697 phase = phases[i];
695 phase = phases[i];
698 /* We only store the sets of phase for non public phase, the public phase
696 /* We only store the sets of phase for non public phase, the public phase
699 * is computed as a difference */
697 * is computed as a difference */
700 if (phase != 0) {
698 if (phase != 0) {
701 phaseset = PyList_GET_ITEM(phasessetlist, phase);
699 phaseset = PyList_GET_ITEM(phasessetlist, phase);
702 rev = PyInt_FromLong(i);
700 rev = PyInt_FromLong(i);
703 if (rev == NULL)
701 if (rev == NULL)
704 goto release;
702 goto release;
705 PySet_Add(phaseset, rev);
703 PySet_Add(phaseset, rev);
706 Py_XDECREF(rev);
704 Py_XDECREF(rev);
707 }
705 }
708 }
706 }
709 ret = PyTuple_Pack(2, phasessize, phasessetlist);
707 ret = PyTuple_Pack(2, phasessize, phasessetlist);
710
708
711 release:
709 release:
712 Py_XDECREF(phasessize);
710 Py_XDECREF(phasessize);
713 Py_XDECREF(phasessetlist);
711 Py_XDECREF(phasessetlist);
714 done:
712 done:
715 free(phases);
713 free(phases);
716 return ret;
714 return ret;
717 }
715 }
718
716
719 static PyObject *index_headrevs(indexObject *self, PyObject *args)
717 static PyObject *index_headrevs(indexObject *self, PyObject *args)
720 {
718 {
721 Py_ssize_t i, j, len;
719 Py_ssize_t i, j, len;
722 char *nothead = NULL;
720 char *nothead = NULL;
723 PyObject *heads = NULL;
721 PyObject *heads = NULL;
724 PyObject *filter = NULL;
722 PyObject *filter = NULL;
725 PyObject *filteredrevs = Py_None;
723 PyObject *filteredrevs = Py_None;
726
724
727 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
725 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
728 return NULL;
726 return NULL;
729 }
727 }
730
728
731 if (self->headrevs && filteredrevs == self->filteredrevs)
729 if (self->headrevs && filteredrevs == self->filteredrevs)
732 return list_copy(self->headrevs);
730 return list_copy(self->headrevs);
733
731
734 Py_DECREF(self->filteredrevs);
732 Py_DECREF(self->filteredrevs);
735 self->filteredrevs = filteredrevs;
733 self->filteredrevs = filteredrevs;
736 Py_INCREF(filteredrevs);
734 Py_INCREF(filteredrevs);
737
735
738 if (filteredrevs != Py_None) {
736 if (filteredrevs != Py_None) {
739 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
737 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
740 if (!filter) {
738 if (!filter) {
741 PyErr_SetString(PyExc_TypeError,
739 PyErr_SetString(PyExc_TypeError,
742 "filteredrevs has no attribute __contains__");
740 "filteredrevs has no attribute __contains__");
743 goto bail;
741 goto bail;
744 }
742 }
745 }
743 }
746
744
747 len = index_length(self) - 1;
745 len = index_length(self) - 1;
748 heads = PyList_New(0);
746 heads = PyList_New(0);
749 if (heads == NULL)
747 if (heads == NULL)
750 goto bail;
748 goto bail;
751 if (len == 0) {
749 if (len == 0) {
752 PyObject *nullid = PyInt_FromLong(-1);
750 PyObject *nullid = PyInt_FromLong(-1);
753 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
751 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
754 Py_XDECREF(nullid);
752 Py_XDECREF(nullid);
755 goto bail;
753 goto bail;
756 }
754 }
757 goto done;
755 goto done;
758 }
756 }
759
757
760 nothead = calloc(len, 1);
758 nothead = calloc(len, 1);
761 if (nothead == NULL) {
759 if (nothead == NULL) {
762 PyErr_NoMemory();
760 PyErr_NoMemory();
763 goto bail;
761 goto bail;
764 }
762 }
765
763
766 for (i = len - 1; i >= 0; i--) {
764 for (i = len - 1; i >= 0; i--) {
767 int isfiltered;
765 int isfiltered;
768 int parents[2];
766 int parents[2];
769
767
770 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
768 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
771 * node already, and therefore this node is not filtered. So we can skip
769 * node already, and therefore this node is not filtered. So we can skip
772 * the expensive check_filter step.
770 * the expensive check_filter step.
773 */
771 */
774 if (nothead[i] != 1) {
772 if (nothead[i] != 1) {
775 isfiltered = check_filter(filter, i);
773 isfiltered = check_filter(filter, i);
776 if (isfiltered == -1) {
774 if (isfiltered == -1) {
777 PyErr_SetString(PyExc_TypeError,
775 PyErr_SetString(PyExc_TypeError,
778 "unable to check filter");
776 "unable to check filter");
779 goto bail;
777 goto bail;
780 }
778 }
781
779
782 if (isfiltered) {
780 if (isfiltered) {
783 nothead[i] = 1;
781 nothead[i] = 1;
784 continue;
782 continue;
785 }
783 }
786 }
784 }
787
785
788 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
786 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
789 goto bail;
787 goto bail;
790 for (j = 0; j < 2; j++) {
788 for (j = 0; j < 2; j++) {
791 if (parents[j] >= 0)
789 if (parents[j] >= 0)
792 nothead[parents[j]] = 1;
790 nothead[parents[j]] = 1;
793 }
791 }
794 }
792 }
795
793
796 for (i = 0; i < len; i++) {
794 for (i = 0; i < len; i++) {
797 PyObject *head;
795 PyObject *head;
798
796
799 if (nothead[i])
797 if (nothead[i])
800 continue;
798 continue;
801 head = PyInt_FromSsize_t(i);
799 head = PyInt_FromSsize_t(i);
802 if (head == NULL || PyList_Append(heads, head) == -1) {
800 if (head == NULL || PyList_Append(heads, head) == -1) {
803 Py_XDECREF(head);
801 Py_XDECREF(head);
804 goto bail;
802 goto bail;
805 }
803 }
806 }
804 }
807
805
808 done:
806 done:
809 self->headrevs = heads;
807 self->headrevs = heads;
810 Py_XDECREF(filter);
808 Py_XDECREF(filter);
811 free(nothead);
809 free(nothead);
812 return list_copy(self->headrevs);
810 return list_copy(self->headrevs);
813 bail:
811 bail:
814 Py_XDECREF(filter);
812 Py_XDECREF(filter);
815 Py_XDECREF(heads);
813 Py_XDECREF(heads);
816 free(nothead);
814 free(nothead);
817 return NULL;
815 return NULL;
818 }
816 }
819
817
820 /**
818 /**
821 * Obtain the base revision index entry.
819 * Obtain the base revision index entry.
822 *
820 *
823 * Callers must ensure that rev >= 0 or illegal memory access may occur.
821 * Callers must ensure that rev >= 0 or illegal memory access may occur.
824 */
822 */
825 static inline int index_baserev(indexObject *self, int rev)
823 static inline int index_baserev(indexObject *self, int rev)
826 {
824 {
827 const char *data;
825 const char *data;
828
826
829 if (rev >= self->length - 1) {
827 if (rev >= self->length - 1) {
830 PyObject *tuple = PyList_GET_ITEM(self->added,
828 PyObject *tuple = PyList_GET_ITEM(self->added,
831 rev - self->length + 1);
829 rev - self->length + 1);
832 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
830 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
833 }
831 }
834 else {
832 else {
835 data = index_deref(self, rev);
833 data = index_deref(self, rev);
836 if (data == NULL) {
834 if (data == NULL) {
837 return -2;
835 return -2;
838 }
836 }
839
837
840 return getbe32(data + 16);
838 return getbe32(data + 16);
841 }
839 }
842 }
840 }
843
841
844 static PyObject *index_deltachain(indexObject *self, PyObject *args)
842 static PyObject *index_deltachain(indexObject *self, PyObject *args)
845 {
843 {
846 int rev, generaldelta;
844 int rev, generaldelta;
847 PyObject *stoparg;
845 PyObject *stoparg;
848 int stoprev, iterrev, baserev = -1;
846 int stoprev, iterrev, baserev = -1;
849 int stopped;
847 int stopped;
850 PyObject *chain = NULL, *result = NULL;
848 PyObject *chain = NULL, *result = NULL;
851 const Py_ssize_t length = index_length(self);
849 const Py_ssize_t length = index_length(self);
852
850
853 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
851 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
854 return NULL;
852 return NULL;
855 }
853 }
856
854
857 if (PyInt_Check(stoparg)) {
855 if (PyInt_Check(stoparg)) {
858 stoprev = (int)PyInt_AsLong(stoparg);
856 stoprev = (int)PyInt_AsLong(stoparg);
859 if (stoprev == -1 && PyErr_Occurred()) {
857 if (stoprev == -1 && PyErr_Occurred()) {
860 return NULL;
858 return NULL;
861 }
859 }
862 }
860 }
863 else if (stoparg == Py_None) {
861 else if (stoparg == Py_None) {
864 stoprev = -2;
862 stoprev = -2;
865 }
863 }
866 else {
864 else {
867 PyErr_SetString(PyExc_ValueError,
865 PyErr_SetString(PyExc_ValueError,
868 "stoprev must be integer or None");
866 "stoprev must be integer or None");
869 return NULL;
867 return NULL;
870 }
868 }
871
869
872 if (rev < 0 || rev >= length - 1) {
870 if (rev < 0 || rev >= length - 1) {
873 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
871 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
874 return NULL;
872 return NULL;
875 }
873 }
876
874
877 chain = PyList_New(0);
875 chain = PyList_New(0);
878 if (chain == NULL) {
876 if (chain == NULL) {
879 return NULL;
877 return NULL;
880 }
878 }
881
879
882 baserev = index_baserev(self, rev);
880 baserev = index_baserev(self, rev);
883
881
884 /* This should never happen. */
882 /* This should never happen. */
885 if (baserev <= -2) {
883 if (baserev <= -2) {
886 /* Error should be set by index_deref() */
884 /* Error should be set by index_deref() */
887 assert(PyErr_Occurred());
885 assert(PyErr_Occurred());
888 goto bail;
886 goto bail;
889 }
887 }
890
888
891 iterrev = rev;
889 iterrev = rev;
892
890
893 while (iterrev != baserev && iterrev != stoprev) {
891 while (iterrev != baserev && iterrev != stoprev) {
894 PyObject *value = PyInt_FromLong(iterrev);
892 PyObject *value = PyInt_FromLong(iterrev);
895 if (value == NULL) {
893 if (value == NULL) {
896 goto bail;
894 goto bail;
897 }
895 }
898 if (PyList_Append(chain, value)) {
896 if (PyList_Append(chain, value)) {
899 Py_DECREF(value);
897 Py_DECREF(value);
900 goto bail;
898 goto bail;
901 }
899 }
902 Py_DECREF(value);
900 Py_DECREF(value);
903
901
904 if (generaldelta) {
902 if (generaldelta) {
905 iterrev = baserev;
903 iterrev = baserev;
906 }
904 }
907 else {
905 else {
908 iterrev--;
906 iterrev--;
909 }
907 }
910
908
911 if (iterrev < 0) {
909 if (iterrev < 0) {
912 break;
910 break;
913 }
911 }
914
912
915 if (iterrev >= length - 1) {
913 if (iterrev >= length - 1) {
916 PyErr_SetString(PyExc_IndexError, "revision outside index");
914 PyErr_SetString(PyExc_IndexError, "revision outside index");
917 return NULL;
915 return NULL;
918 }
916 }
919
917
920 baserev = index_baserev(self, iterrev);
918 baserev = index_baserev(self, iterrev);
921
919
922 /* This should never happen. */
920 /* This should never happen. */
923 if (baserev <= -2) {
921 if (baserev <= -2) {
924 /* Error should be set by index_deref() */
922 /* Error should be set by index_deref() */
925 assert(PyErr_Occurred());
923 assert(PyErr_Occurred());
926 goto bail;
924 goto bail;
927 }
925 }
928 }
926 }
929
927
930 if (iterrev == stoprev) {
928 if (iterrev == stoprev) {
931 stopped = 1;
929 stopped = 1;
932 }
930 }
933 else {
931 else {
934 PyObject *value = PyInt_FromLong(iterrev);
932 PyObject *value = PyInt_FromLong(iterrev);
935 if (value == NULL) {
933 if (value == NULL) {
936 goto bail;
934 goto bail;
937 }
935 }
938 if (PyList_Append(chain, value)) {
936 if (PyList_Append(chain, value)) {
939 Py_DECREF(value);
937 Py_DECREF(value);
940 goto bail;
938 goto bail;
941 }
939 }
942 Py_DECREF(value);
940 Py_DECREF(value);
943
941
944 stopped = 0;
942 stopped = 0;
945 }
943 }
946
944
947 if (PyList_Reverse(chain)) {
945 if (PyList_Reverse(chain)) {
948 goto bail;
946 goto bail;
949 }
947 }
950
948
951 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
949 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
952 Py_DECREF(chain);
950 Py_DECREF(chain);
953 return result;
951 return result;
954
952
955 bail:
953 bail:
956 Py_DECREF(chain);
954 Py_DECREF(chain);
957 return NULL;
955 return NULL;
958 }
956 }
959
957
960 static inline int nt_level(const char *node, Py_ssize_t level)
958 static inline int nt_level(const char *node, Py_ssize_t level)
961 {
959 {
962 int v = node[level>>1];
960 int v = node[level>>1];
963 if (!(level & 1))
961 if (!(level & 1))
964 v >>= 4;
962 v >>= 4;
965 return v & 0xf;
963 return v & 0xf;
966 }
964 }
967
965
968 /*
966 /*
969 * Return values:
967 * Return values:
970 *
968 *
971 * -4: match is ambiguous (multiple candidates)
969 * -4: match is ambiguous (multiple candidates)
972 * -2: not found
970 * -2: not found
973 * rest: valid rev
971 * rest: valid rev
974 */
972 */
975 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
973 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
976 int hex)
974 int hex)
977 {
975 {
978 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
976 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
979 int level, maxlevel, off;
977 int level, maxlevel, off;
980
978
981 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
979 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
982 return -1;
980 return -1;
983
981
984 if (self->nt == NULL)
982 if (self->nt == NULL)
985 return -2;
983 return -2;
986
984
987 if (hex)
985 if (hex)
988 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
986 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
989 else
987 else
990 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
988 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
991
989
992 for (level = off = 0; level < maxlevel; level++) {
990 for (level = off = 0; level < maxlevel; level++) {
993 int k = getnybble(node, level);
991 int k = getnybble(node, level);
994 nodetree *n = &self->nt[off];
992 nodetree *n = &self->nt[off];
995 int v = n->children[k];
993 int v = n->children[k];
996
994
997 if (v < 0) {
995 if (v < 0) {
998 const char *n;
996 const char *n;
999 Py_ssize_t i;
997 Py_ssize_t i;
1000
998
1001 v = -(v + 1);
999 v = -(v + 1);
1002 n = index_node(self, v);
1000 n = index_node(self, v);
1003 if (n == NULL)
1001 if (n == NULL)
1004 return -2;
1002 return -2;
1005 for (i = level; i < maxlevel; i++)
1003 for (i = level; i < maxlevel; i++)
1006 if (getnybble(node, i) != nt_level(n, i))
1004 if (getnybble(node, i) != nt_level(n, i))
1007 return -2;
1005 return -2;
1008 return v;
1006 return v;
1009 }
1007 }
1010 if (v == 0)
1008 if (v == 0)
1011 return -2;
1009 return -2;
1012 off = v;
1010 off = v;
1013 }
1011 }
1014 /* multiple matches against an ambiguous prefix */
1012 /* multiple matches against an ambiguous prefix */
1015 return -4;
1013 return -4;
1016 }
1014 }
1017
1015
1018 static int nt_new(indexObject *self)
1016 static int nt_new(indexObject *self)
1019 {
1017 {
1020 if (self->ntlength == self->ntcapacity) {
1018 if (self->ntlength == self->ntcapacity) {
1021 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1019 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1022 PyErr_SetString(PyExc_MemoryError,
1020 PyErr_SetString(PyExc_MemoryError,
1023 "overflow in nt_new");
1021 "overflow in nt_new");
1024 return -1;
1022 return -1;
1025 }
1023 }
1026 self->ntcapacity *= 2;
1024 self->ntcapacity *= 2;
1027 self->nt = realloc(self->nt,
1025 self->nt = realloc(self->nt,
1028 self->ntcapacity * sizeof(nodetree));
1026 self->ntcapacity * sizeof(nodetree));
1029 if (self->nt == NULL) {
1027 if (self->nt == NULL) {
1030 PyErr_SetString(PyExc_MemoryError, "out of memory");
1028 PyErr_SetString(PyExc_MemoryError, "out of memory");
1031 return -1;
1029 return -1;
1032 }
1030 }
1033 memset(&self->nt[self->ntlength], 0,
1031 memset(&self->nt[self->ntlength], 0,
1034 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1032 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1035 }
1033 }
1036 return self->ntlength++;
1034 return self->ntlength++;
1037 }
1035 }
1038
1036
1039 static int nt_insert(indexObject *self, const char *node, int rev)
1037 static int nt_insert(indexObject *self, const char *node, int rev)
1040 {
1038 {
1041 int level = 0;
1039 int level = 0;
1042 int off = 0;
1040 int off = 0;
1043
1041
1044 while (level < 40) {
1042 while (level < 40) {
1045 int k = nt_level(node, level);
1043 int k = nt_level(node, level);
1046 nodetree *n;
1044 nodetree *n;
1047 int v;
1045 int v;
1048
1046
1049 n = &self->nt[off];
1047 n = &self->nt[off];
1050 v = n->children[k];
1048 v = n->children[k];
1051
1049
1052 if (v == 0) {
1050 if (v == 0) {
1053 n->children[k] = -rev - 1;
1051 n->children[k] = -rev - 1;
1054 return 0;
1052 return 0;
1055 }
1053 }
1056 if (v < 0) {
1054 if (v < 0) {
1057 const char *oldnode = index_node(self, -(v + 1));
1055 const char *oldnode = index_node(self, -(v + 1));
1058 int noff;
1056 int noff;
1059
1057
1060 if (!oldnode || !memcmp(oldnode, node, 20)) {
1058 if (!oldnode || !memcmp(oldnode, node, 20)) {
1061 n->children[k] = -rev - 1;
1059 n->children[k] = -rev - 1;
1062 return 0;
1060 return 0;
1063 }
1061 }
1064 noff = nt_new(self);
1062 noff = nt_new(self);
1065 if (noff == -1)
1063 if (noff == -1)
1066 return -1;
1064 return -1;
1067 /* self->nt may have been changed by realloc */
1065 /* self->nt may have been changed by realloc */
1068 self->nt[off].children[k] = noff;
1066 self->nt[off].children[k] = noff;
1069 off = noff;
1067 off = noff;
1070 n = &self->nt[off];
1068 n = &self->nt[off];
1071 n->children[nt_level(oldnode, ++level)] = v;
1069 n->children[nt_level(oldnode, ++level)] = v;
1072 if (level > self->ntdepth)
1070 if (level > self->ntdepth)
1073 self->ntdepth = level;
1071 self->ntdepth = level;
1074 self->ntsplits += 1;
1072 self->ntsplits += 1;
1075 } else {
1073 } else {
1076 level += 1;
1074 level += 1;
1077 off = v;
1075 off = v;
1078 }
1076 }
1079 }
1077 }
1080
1078
1081 return -1;
1079 return -1;
1082 }
1080 }
1083
1081
1084 static int nt_init(indexObject *self)
1082 static int nt_init(indexObject *self)
1085 {
1083 {
1086 if (self->nt == NULL) {
1084 if (self->nt == NULL) {
1087 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1085 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1088 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1086 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1089 return -1;
1087 return -1;
1090 }
1088 }
1091 self->ntcapacity = self->raw_length < 4
1089 self->ntcapacity = self->raw_length < 4
1092 ? 4 : (int)self->raw_length / 2;
1090 ? 4 : (int)self->raw_length / 2;
1093
1091
1094 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1092 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1095 if (self->nt == NULL) {
1093 if (self->nt == NULL) {
1096 PyErr_NoMemory();
1094 PyErr_NoMemory();
1097 return -1;
1095 return -1;
1098 }
1096 }
1099 self->ntlength = 1;
1097 self->ntlength = 1;
1100 self->ntrev = (int)index_length(self) - 1;
1098 self->ntrev = (int)index_length(self) - 1;
1101 self->ntlookups = 1;
1099 self->ntlookups = 1;
1102 self->ntmisses = 0;
1100 self->ntmisses = 0;
1103 if (nt_insert(self, nullid, INT_MAX) == -1)
1101 if (nt_insert(self, nullid, INT_MAX) == -1)
1104 return -1;
1102 return -1;
1105 }
1103 }
1106 return 0;
1104 return 0;
1107 }
1105 }
1108
1106
1109 /*
1107 /*
1110 * Return values:
1108 * Return values:
1111 *
1109 *
1112 * -3: error (exception set)
1110 * -3: error (exception set)
1113 * -2: not found (no exception set)
1111 * -2: not found (no exception set)
1114 * rest: valid rev
1112 * rest: valid rev
1115 */
1113 */
1116 static int index_find_node(indexObject *self,
1114 static int index_find_node(indexObject *self,
1117 const char *node, Py_ssize_t nodelen)
1115 const char *node, Py_ssize_t nodelen)
1118 {
1116 {
1119 int rev;
1117 int rev;
1120
1118
1121 self->ntlookups++;
1119 self->ntlookups++;
1122 rev = nt_find(self, node, nodelen, 0);
1120 rev = nt_find(self, node, nodelen, 0);
1123 if (rev >= -1)
1121 if (rev >= -1)
1124 return rev;
1122 return rev;
1125
1123
1126 if (nt_init(self) == -1)
1124 if (nt_init(self) == -1)
1127 return -3;
1125 return -3;
1128
1126
1129 /*
1127 /*
1130 * For the first handful of lookups, we scan the entire index,
1128 * For the first handful of lookups, we scan the entire index,
1131 * and cache only the matching nodes. This optimizes for cases
1129 * and cache only the matching nodes. This optimizes for cases
1132 * like "hg tip", where only a few nodes are accessed.
1130 * like "hg tip", where only a few nodes are accessed.
1133 *
1131 *
1134 * After that, we cache every node we visit, using a single
1132 * After that, we cache every node we visit, using a single
1135 * scan amortized over multiple lookups. This gives the best
1133 * scan amortized over multiple lookups. This gives the best
1136 * bulk performance, e.g. for "hg log".
1134 * bulk performance, e.g. for "hg log".
1137 */
1135 */
1138 if (self->ntmisses++ < 4) {
1136 if (self->ntmisses++ < 4) {
1139 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1137 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1140 const char *n = index_node(self, rev);
1138 const char *n = index_node(self, rev);
1141 if (n == NULL)
1139 if (n == NULL)
1142 return -2;
1140 return -2;
1143 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1141 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1144 if (nt_insert(self, n, rev) == -1)
1142 if (nt_insert(self, n, rev) == -1)
1145 return -3;
1143 return -3;
1146 break;
1144 break;
1147 }
1145 }
1148 }
1146 }
1149 } else {
1147 } else {
1150 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1148 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1151 const char *n = index_node(self, rev);
1149 const char *n = index_node(self, rev);
1152 if (n == NULL) {
1150 if (n == NULL) {
1153 self->ntrev = rev + 1;
1151 self->ntrev = rev + 1;
1154 return -2;
1152 return -2;
1155 }
1153 }
1156 if (nt_insert(self, n, rev) == -1) {
1154 if (nt_insert(self, n, rev) == -1) {
1157 self->ntrev = rev + 1;
1155 self->ntrev = rev + 1;
1158 return -3;
1156 return -3;
1159 }
1157 }
1160 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1158 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1161 break;
1159 break;
1162 }
1160 }
1163 }
1161 }
1164 self->ntrev = rev;
1162 self->ntrev = rev;
1165 }
1163 }
1166
1164
1167 if (rev >= 0)
1165 if (rev >= 0)
1168 return rev;
1166 return rev;
1169 return -2;
1167 return -2;
1170 }
1168 }
1171
1169
1172 static void raise_revlog_error(void)
1170 static void raise_revlog_error(void)
1173 {
1171 {
1174 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1172 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1175
1173
1176 mod = PyImport_ImportModule("mercurial.error");
1174 mod = PyImport_ImportModule("mercurial.error");
1177 if (mod == NULL) {
1175 if (mod == NULL) {
1178 goto cleanup;
1176 goto cleanup;
1179 }
1177 }
1180
1178
1181 dict = PyModule_GetDict(mod);
1179 dict = PyModule_GetDict(mod);
1182 if (dict == NULL) {
1180 if (dict == NULL) {
1183 goto cleanup;
1181 goto cleanup;
1184 }
1182 }
1185 Py_INCREF(dict);
1183 Py_INCREF(dict);
1186
1184
1187 errclass = PyDict_GetItemString(dict, "RevlogError");
1185 errclass = PyDict_GetItemString(dict, "RevlogError");
1188 if (errclass == NULL) {
1186 if (errclass == NULL) {
1189 PyErr_SetString(PyExc_SystemError,
1187 PyErr_SetString(PyExc_SystemError,
1190 "could not find RevlogError");
1188 "could not find RevlogError");
1191 goto cleanup;
1189 goto cleanup;
1192 }
1190 }
1193
1191
1194 /* value of exception is ignored by callers */
1192 /* value of exception is ignored by callers */
1195 PyErr_SetString(errclass, "RevlogError");
1193 PyErr_SetString(errclass, "RevlogError");
1196
1194
1197 cleanup:
1195 cleanup:
1198 Py_XDECREF(dict);
1196 Py_XDECREF(dict);
1199 Py_XDECREF(mod);
1197 Py_XDECREF(mod);
1200 }
1198 }
1201
1199
1202 static PyObject *index_getitem(indexObject *self, PyObject *value)
1200 static PyObject *index_getitem(indexObject *self, PyObject *value)
1203 {
1201 {
1204 char *node;
1202 char *node;
1205 Py_ssize_t nodelen;
1203 Py_ssize_t nodelen;
1206 int rev;
1204 int rev;
1207
1205
1208 if (PyInt_Check(value))
1206 if (PyInt_Check(value))
1209 return index_get(self, PyInt_AS_LONG(value));
1207 return index_get(self, PyInt_AS_LONG(value));
1210
1208
1211 if (node_check(value, &node, &nodelen) == -1)
1209 if (node_check(value, &node, &nodelen) == -1)
1212 return NULL;
1210 return NULL;
1213 rev = index_find_node(self, node, nodelen);
1211 rev = index_find_node(self, node, nodelen);
1214 if (rev >= -1)
1212 if (rev >= -1)
1215 return PyInt_FromLong(rev);
1213 return PyInt_FromLong(rev);
1216 if (rev == -2)
1214 if (rev == -2)
1217 raise_revlog_error();
1215 raise_revlog_error();
1218 return NULL;
1216 return NULL;
1219 }
1217 }
1220
1218
1221 static int nt_partialmatch(indexObject *self, const char *node,
1219 static int nt_partialmatch(indexObject *self, const char *node,
1222 Py_ssize_t nodelen)
1220 Py_ssize_t nodelen)
1223 {
1221 {
1224 int rev;
1222 int rev;
1225
1223
1226 if (nt_init(self) == -1)
1224 if (nt_init(self) == -1)
1227 return -3;
1225 return -3;
1228
1226
1229 if (self->ntrev > 0) {
1227 if (self->ntrev > 0) {
1230 /* ensure that the radix tree is fully populated */
1228 /* ensure that the radix tree is fully populated */
1231 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1229 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1232 const char *n = index_node(self, rev);
1230 const char *n = index_node(self, rev);
1233 if (n == NULL)
1231 if (n == NULL)
1234 return -2;
1232 return -2;
1235 if (nt_insert(self, n, rev) == -1)
1233 if (nt_insert(self, n, rev) == -1)
1236 return -3;
1234 return -3;
1237 }
1235 }
1238 self->ntrev = rev;
1236 self->ntrev = rev;
1239 }
1237 }
1240
1238
1241 return nt_find(self, node, nodelen, 1);
1239 return nt_find(self, node, nodelen, 1);
1242 }
1240 }
1243
1241
1244 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1242 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1245 {
1243 {
1246 const char *fullnode;
1244 const char *fullnode;
1247 int nodelen;
1245 int nodelen;
1248 char *node;
1246 char *node;
1249 int rev, i;
1247 int rev, i;
1250
1248
1251 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1249 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1252 return NULL;
1250 return NULL;
1253
1251
1254 if (nodelen < 4) {
1252 if (nodelen < 4) {
1255 PyErr_SetString(PyExc_ValueError, "key too short");
1253 PyErr_SetString(PyExc_ValueError, "key too short");
1256 return NULL;
1254 return NULL;
1257 }
1255 }
1258
1256
1259 if (nodelen > 40) {
1257 if (nodelen > 40) {
1260 PyErr_SetString(PyExc_ValueError, "key too long");
1258 PyErr_SetString(PyExc_ValueError, "key too long");
1261 return NULL;
1259 return NULL;
1262 }
1260 }
1263
1261
1264 for (i = 0; i < nodelen; i++)
1262 for (i = 0; i < nodelen; i++)
1265 hexdigit(node, i);
1263 hexdigit(node, i);
1266 if (PyErr_Occurred()) {
1264 if (PyErr_Occurred()) {
1267 /* input contains non-hex characters */
1265 /* input contains non-hex characters */
1268 PyErr_Clear();
1266 PyErr_Clear();
1269 Py_RETURN_NONE;
1267 Py_RETURN_NONE;
1270 }
1268 }
1271
1269
1272 rev = nt_partialmatch(self, node, nodelen);
1270 rev = nt_partialmatch(self, node, nodelen);
1273
1271
1274 switch (rev) {
1272 switch (rev) {
1275 case -4:
1273 case -4:
1276 raise_revlog_error();
1274 raise_revlog_error();
1277 case -3:
1275 case -3:
1278 return NULL;
1276 return NULL;
1279 case -2:
1277 case -2:
1280 Py_RETURN_NONE;
1278 Py_RETURN_NONE;
1281 case -1:
1279 case -1:
1282 return PyBytes_FromStringAndSize(nullid, 20);
1280 return PyBytes_FromStringAndSize(nullid, 20);
1283 }
1281 }
1284
1282
1285 fullnode = index_node(self, rev);
1283 fullnode = index_node(self, rev);
1286 if (fullnode == NULL) {
1284 if (fullnode == NULL) {
1287 PyErr_Format(PyExc_IndexError,
1285 PyErr_Format(PyExc_IndexError,
1288 "could not access rev %d", rev);
1286 "could not access rev %d", rev);
1289 return NULL;
1287 return NULL;
1290 }
1288 }
1291 return PyBytes_FromStringAndSize(fullnode, 20);
1289 return PyBytes_FromStringAndSize(fullnode, 20);
1292 }
1290 }
1293
1291
1294 static PyObject *index_m_get(indexObject *self, PyObject *args)
1292 static PyObject *index_m_get(indexObject *self, PyObject *args)
1295 {
1293 {
1296 Py_ssize_t nodelen;
1294 Py_ssize_t nodelen;
1297 PyObject *val;
1295 PyObject *val;
1298 char *node;
1296 char *node;
1299 int rev;
1297 int rev;
1300
1298
1301 if (!PyArg_ParseTuple(args, "O", &val))
1299 if (!PyArg_ParseTuple(args, "O", &val))
1302 return NULL;
1300 return NULL;
1303 if (node_check(val, &node, &nodelen) == -1)
1301 if (node_check(val, &node, &nodelen) == -1)
1304 return NULL;
1302 return NULL;
1305 rev = index_find_node(self, node, nodelen);
1303 rev = index_find_node(self, node, nodelen);
1306 if (rev == -3)
1304 if (rev == -3)
1307 return NULL;
1305 return NULL;
1308 if (rev == -2)
1306 if (rev == -2)
1309 Py_RETURN_NONE;
1307 Py_RETURN_NONE;
1310 return PyInt_FromLong(rev);
1308 return PyInt_FromLong(rev);
1311 }
1309 }
1312
1310
1313 static int index_contains(indexObject *self, PyObject *value)
1311 static int index_contains(indexObject *self, PyObject *value)
1314 {
1312 {
1315 char *node;
1313 char *node;
1316 Py_ssize_t nodelen;
1314 Py_ssize_t nodelen;
1317
1315
1318 if (PyInt_Check(value)) {
1316 if (PyInt_Check(value)) {
1319 long rev = PyInt_AS_LONG(value);
1317 long rev = PyInt_AS_LONG(value);
1320 return rev >= -1 && rev < index_length(self);
1318 return rev >= -1 && rev < index_length(self);
1321 }
1319 }
1322
1320
1323 if (node_check(value, &node, &nodelen) == -1)
1321 if (node_check(value, &node, &nodelen) == -1)
1324 return -1;
1322 return -1;
1325
1323
1326 switch (index_find_node(self, node, nodelen)) {
1324 switch (index_find_node(self, node, nodelen)) {
1327 case -3:
1325 case -3:
1328 return -1;
1326 return -1;
1329 case -2:
1327 case -2:
1330 return 0;
1328 return 0;
1331 default:
1329 default:
1332 return 1;
1330 return 1;
1333 }
1331 }
1334 }
1332 }
1335
1333
1336 typedef uint64_t bitmask;
1334 typedef uint64_t bitmask;
1337
1335
1338 /*
1336 /*
1339 * Given a disjoint set of revs, return all candidates for the
1337 * Given a disjoint set of revs, return all candidates for the
1340 * greatest common ancestor. In revset notation, this is the set
1338 * greatest common ancestor. In revset notation, this is the set
1341 * "heads(::a and ::b and ...)"
1339 * "heads(::a and ::b and ...)"
1342 */
1340 */
1343 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1341 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1344 int revcount)
1342 int revcount)
1345 {
1343 {
1346 const bitmask allseen = (1ull << revcount) - 1;
1344 const bitmask allseen = (1ull << revcount) - 1;
1347 const bitmask poison = 1ull << revcount;
1345 const bitmask poison = 1ull << revcount;
1348 PyObject *gca = PyList_New(0);
1346 PyObject *gca = PyList_New(0);
1349 int i, v, interesting;
1347 int i, v, interesting;
1350 int maxrev = -1;
1348 int maxrev = -1;
1351 bitmask sp;
1349 bitmask sp;
1352 bitmask *seen;
1350 bitmask *seen;
1353
1351
1354 if (gca == NULL)
1352 if (gca == NULL)
1355 return PyErr_NoMemory();
1353 return PyErr_NoMemory();
1356
1354
1357 for (i = 0; i < revcount; i++) {
1355 for (i = 0; i < revcount; i++) {
1358 if (revs[i] > maxrev)
1356 if (revs[i] > maxrev)
1359 maxrev = revs[i];
1357 maxrev = revs[i];
1360 }
1358 }
1361
1359
1362 seen = calloc(sizeof(*seen), maxrev + 1);
1360 seen = calloc(sizeof(*seen), maxrev + 1);
1363 if (seen == NULL) {
1361 if (seen == NULL) {
1364 Py_DECREF(gca);
1362 Py_DECREF(gca);
1365 return PyErr_NoMemory();
1363 return PyErr_NoMemory();
1366 }
1364 }
1367
1365
1368 for (i = 0; i < revcount; i++)
1366 for (i = 0; i < revcount; i++)
1369 seen[revs[i]] = 1ull << i;
1367 seen[revs[i]] = 1ull << i;
1370
1368
1371 interesting = revcount;
1369 interesting = revcount;
1372
1370
1373 for (v = maxrev; v >= 0 && interesting; v--) {
1371 for (v = maxrev; v >= 0 && interesting; v--) {
1374 bitmask sv = seen[v];
1372 bitmask sv = seen[v];
1375 int parents[2];
1373 int parents[2];
1376
1374
1377 if (!sv)
1375 if (!sv)
1378 continue;
1376 continue;
1379
1377
1380 if (sv < poison) {
1378 if (sv < poison) {
1381 interesting -= 1;
1379 interesting -= 1;
1382 if (sv == allseen) {
1380 if (sv == allseen) {
1383 PyObject *obj = PyInt_FromLong(v);
1381 PyObject *obj = PyInt_FromLong(v);
1384 if (obj == NULL)
1382 if (obj == NULL)
1385 goto bail;
1383 goto bail;
1386 if (PyList_Append(gca, obj) == -1) {
1384 if (PyList_Append(gca, obj) == -1) {
1387 Py_DECREF(obj);
1385 Py_DECREF(obj);
1388 goto bail;
1386 goto bail;
1389 }
1387 }
1390 sv |= poison;
1388 sv |= poison;
1391 for (i = 0; i < revcount; i++) {
1389 for (i = 0; i < revcount; i++) {
1392 if (revs[i] == v)
1390 if (revs[i] == v)
1393 goto done;
1391 goto done;
1394 }
1392 }
1395 }
1393 }
1396 }
1394 }
1397 if (index_get_parents(self, v, parents, maxrev) < 0)
1395 if (index_get_parents(self, v, parents, maxrev) < 0)
1398 goto bail;
1396 goto bail;
1399
1397
1400 for (i = 0; i < 2; i++) {
1398 for (i = 0; i < 2; i++) {
1401 int p = parents[i];
1399 int p = parents[i];
1402 if (p == -1)
1400 if (p == -1)
1403 continue;
1401 continue;
1404 sp = seen[p];
1402 sp = seen[p];
1405 if (sv < poison) {
1403 if (sv < poison) {
1406 if (sp == 0) {
1404 if (sp == 0) {
1407 seen[p] = sv;
1405 seen[p] = sv;
1408 interesting++;
1406 interesting++;
1409 }
1407 }
1410 else if (sp != sv)
1408 else if (sp != sv)
1411 seen[p] |= sv;
1409 seen[p] |= sv;
1412 } else {
1410 } else {
1413 if (sp && sp < poison)
1411 if (sp && sp < poison)
1414 interesting--;
1412 interesting--;
1415 seen[p] = sv;
1413 seen[p] = sv;
1416 }
1414 }
1417 }
1415 }
1418 }
1416 }
1419
1417
1420 done:
1418 done:
1421 free(seen);
1419 free(seen);
1422 return gca;
1420 return gca;
1423 bail:
1421 bail:
1424 free(seen);
1422 free(seen);
1425 Py_XDECREF(gca);
1423 Py_XDECREF(gca);
1426 return NULL;
1424 return NULL;
1427 }
1425 }
1428
1426
1429 /*
1427 /*
1430 * Given a disjoint set of revs, return the subset with the longest
1428 * Given a disjoint set of revs, return the subset with the longest
1431 * path to the root.
1429 * path to the root.
1432 */
1430 */
1433 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1431 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1434 {
1432 {
1435 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1433 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1436 static const Py_ssize_t capacity = 24;
1434 static const Py_ssize_t capacity = 24;
1437 int *depth, *interesting = NULL;
1435 int *depth, *interesting = NULL;
1438 int i, j, v, ninteresting;
1436 int i, j, v, ninteresting;
1439 PyObject *dict = NULL, *keys = NULL;
1437 PyObject *dict = NULL, *keys = NULL;
1440 long *seen = NULL;
1438 long *seen = NULL;
1441 int maxrev = -1;
1439 int maxrev = -1;
1442 long final;
1440 long final;
1443
1441
1444 if (revcount > capacity) {
1442 if (revcount > capacity) {
1445 PyErr_Format(PyExc_OverflowError,
1443 PyErr_Format(PyExc_OverflowError,
1446 "bitset size (%ld) > capacity (%ld)",
1444 "bitset size (%ld) > capacity (%ld)",
1447 (long)revcount, (long)capacity);
1445 (long)revcount, (long)capacity);
1448 return NULL;
1446 return NULL;
1449 }
1447 }
1450
1448
1451 for (i = 0; i < revcount; i++) {
1449 for (i = 0; i < revcount; i++) {
1452 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1450 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1453 if (n > maxrev)
1451 if (n > maxrev)
1454 maxrev = n;
1452 maxrev = n;
1455 }
1453 }
1456
1454
1457 depth = calloc(sizeof(*depth), maxrev + 1);
1455 depth = calloc(sizeof(*depth), maxrev + 1);
1458 if (depth == NULL)
1456 if (depth == NULL)
1459 return PyErr_NoMemory();
1457 return PyErr_NoMemory();
1460
1458
1461 seen = calloc(sizeof(*seen), maxrev + 1);
1459 seen = calloc(sizeof(*seen), maxrev + 1);
1462 if (seen == NULL) {
1460 if (seen == NULL) {
1463 PyErr_NoMemory();
1461 PyErr_NoMemory();
1464 goto bail;
1462 goto bail;
1465 }
1463 }
1466
1464
1467 interesting = calloc(sizeof(*interesting), 1 << revcount);
1465 interesting = calloc(sizeof(*interesting), 1 << revcount);
1468 if (interesting == NULL) {
1466 if (interesting == NULL) {
1469 PyErr_NoMemory();
1467 PyErr_NoMemory();
1470 goto bail;
1468 goto bail;
1471 }
1469 }
1472
1470
1473 if (PyList_Sort(revs) == -1)
1471 if (PyList_Sort(revs) == -1)
1474 goto bail;
1472 goto bail;
1475
1473
1476 for (i = 0; i < revcount; i++) {
1474 for (i = 0; i < revcount; i++) {
1477 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1475 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1478 long b = 1l << i;
1476 long b = 1l << i;
1479 depth[n] = 1;
1477 depth[n] = 1;
1480 seen[n] = b;
1478 seen[n] = b;
1481 interesting[b] = 1;
1479 interesting[b] = 1;
1482 }
1480 }
1483
1481
1484 /* invariant: ninteresting is the number of non-zero entries in
1482 /* invariant: ninteresting is the number of non-zero entries in
1485 * interesting. */
1483 * interesting. */
1486 ninteresting = (int)revcount;
1484 ninteresting = (int)revcount;
1487
1485
1488 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1486 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1489 int dv = depth[v];
1487 int dv = depth[v];
1490 int parents[2];
1488 int parents[2];
1491 long sv;
1489 long sv;
1492
1490
1493 if (dv == 0)
1491 if (dv == 0)
1494 continue;
1492 continue;
1495
1493
1496 sv = seen[v];
1494 sv = seen[v];
1497 if (index_get_parents(self, v, parents, maxrev) < 0)
1495 if (index_get_parents(self, v, parents, maxrev) < 0)
1498 goto bail;
1496 goto bail;
1499
1497
1500 for (i = 0; i < 2; i++) {
1498 for (i = 0; i < 2; i++) {
1501 int p = parents[i];
1499 int p = parents[i];
1502 long sp;
1500 long sp;
1503 int dp;
1501 int dp;
1504
1502
1505 if (p == -1)
1503 if (p == -1)
1506 continue;
1504 continue;
1507
1505
1508 dp = depth[p];
1506 dp = depth[p];
1509 sp = seen[p];
1507 sp = seen[p];
1510 if (dp <= dv) {
1508 if (dp <= dv) {
1511 depth[p] = dv + 1;
1509 depth[p] = dv + 1;
1512 if (sp != sv) {
1510 if (sp != sv) {
1513 interesting[sv] += 1;
1511 interesting[sv] += 1;
1514 seen[p] = sv;
1512 seen[p] = sv;
1515 if (sp) {
1513 if (sp) {
1516 interesting[sp] -= 1;
1514 interesting[sp] -= 1;
1517 if (interesting[sp] == 0)
1515 if (interesting[sp] == 0)
1518 ninteresting -= 1;
1516 ninteresting -= 1;
1519 }
1517 }
1520 }
1518 }
1521 }
1519 }
1522 else if (dv == dp - 1) {
1520 else if (dv == dp - 1) {
1523 long nsp = sp | sv;
1521 long nsp = sp | sv;
1524 if (nsp == sp)
1522 if (nsp == sp)
1525 continue;
1523 continue;
1526 seen[p] = nsp;
1524 seen[p] = nsp;
1527 interesting[sp] -= 1;
1525 interesting[sp] -= 1;
1528 if (interesting[sp] == 0)
1526 if (interesting[sp] == 0)
1529 ninteresting -= 1;
1527 ninteresting -= 1;
1530 if (interesting[nsp] == 0)
1528 if (interesting[nsp] == 0)
1531 ninteresting += 1;
1529 ninteresting += 1;
1532 interesting[nsp] += 1;
1530 interesting[nsp] += 1;
1533 }
1531 }
1534 }
1532 }
1535 interesting[sv] -= 1;
1533 interesting[sv] -= 1;
1536 if (interesting[sv] == 0)
1534 if (interesting[sv] == 0)
1537 ninteresting -= 1;
1535 ninteresting -= 1;
1538 }
1536 }
1539
1537
1540 final = 0;
1538 final = 0;
1541 j = ninteresting;
1539 j = ninteresting;
1542 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1540 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1543 if (interesting[i] == 0)
1541 if (interesting[i] == 0)
1544 continue;
1542 continue;
1545 final |= i;
1543 final |= i;
1546 j -= 1;
1544 j -= 1;
1547 }
1545 }
1548 if (final == 0) {
1546 if (final == 0) {
1549 keys = PyList_New(0);
1547 keys = PyList_New(0);
1550 goto bail;
1548 goto bail;
1551 }
1549 }
1552
1550
1553 dict = PyDict_New();
1551 dict = PyDict_New();
1554 if (dict == NULL)
1552 if (dict == NULL)
1555 goto bail;
1553 goto bail;
1556
1554
1557 for (i = 0; i < revcount; i++) {
1555 for (i = 0; i < revcount; i++) {
1558 PyObject *key;
1556 PyObject *key;
1559
1557
1560 if ((final & (1 << i)) == 0)
1558 if ((final & (1 << i)) == 0)
1561 continue;
1559 continue;
1562
1560
1563 key = PyList_GET_ITEM(revs, i);
1561 key = PyList_GET_ITEM(revs, i);
1564 Py_INCREF(key);
1562 Py_INCREF(key);
1565 Py_INCREF(Py_None);
1563 Py_INCREF(Py_None);
1566 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1564 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1567 Py_DECREF(key);
1565 Py_DECREF(key);
1568 Py_DECREF(Py_None);
1566 Py_DECREF(Py_None);
1569 goto bail;
1567 goto bail;
1570 }
1568 }
1571 }
1569 }
1572
1570
1573 keys = PyDict_Keys(dict);
1571 keys = PyDict_Keys(dict);
1574
1572
1575 bail:
1573 bail:
1576 free(depth);
1574 free(depth);
1577 free(seen);
1575 free(seen);
1578 free(interesting);
1576 free(interesting);
1579 Py_XDECREF(dict);
1577 Py_XDECREF(dict);
1580
1578
1581 return keys;
1579 return keys;
1582 }
1580 }
1583
1581
1584 /*
1582 /*
1585 * Given a (possibly overlapping) set of revs, return all the
1583 * Given a (possibly overlapping) set of revs, return all the
1586 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1584 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1587 */
1585 */
1588 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1586 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1589 {
1587 {
1590 PyObject *ret = NULL;
1588 PyObject *ret = NULL;
1591 Py_ssize_t argcount, i, len;
1589 Py_ssize_t argcount, i, len;
1592 bitmask repeat = 0;
1590 bitmask repeat = 0;
1593 int revcount = 0;
1591 int revcount = 0;
1594 int *revs;
1592 int *revs;
1595
1593
1596 argcount = PySequence_Length(args);
1594 argcount = PySequence_Length(args);
1597 revs = PyMem_Malloc(argcount * sizeof(*revs));
1595 revs = PyMem_Malloc(argcount * sizeof(*revs));
1598 if (argcount > 0 && revs == NULL)
1596 if (argcount > 0 && revs == NULL)
1599 return PyErr_NoMemory();
1597 return PyErr_NoMemory();
1600 len = index_length(self) - 1;
1598 len = index_length(self) - 1;
1601
1599
1602 for (i = 0; i < argcount; i++) {
1600 for (i = 0; i < argcount; i++) {
1603 static const int capacity = 24;
1601 static const int capacity = 24;
1604 PyObject *obj = PySequence_GetItem(args, i);
1602 PyObject *obj = PySequence_GetItem(args, i);
1605 bitmask x;
1603 bitmask x;
1606 long val;
1604 long val;
1607
1605
1608 if (!PyInt_Check(obj)) {
1606 if (!PyInt_Check(obj)) {
1609 PyErr_SetString(PyExc_TypeError,
1607 PyErr_SetString(PyExc_TypeError,
1610 "arguments must all be ints");
1608 "arguments must all be ints");
1611 Py_DECREF(obj);
1609 Py_DECREF(obj);
1612 goto bail;
1610 goto bail;
1613 }
1611 }
1614 val = PyInt_AsLong(obj);
1612 val = PyInt_AsLong(obj);
1615 Py_DECREF(obj);
1613 Py_DECREF(obj);
1616 if (val == -1) {
1614 if (val == -1) {
1617 ret = PyList_New(0);
1615 ret = PyList_New(0);
1618 goto done;
1616 goto done;
1619 }
1617 }
1620 if (val < 0 || val >= len) {
1618 if (val < 0 || val >= len) {
1621 PyErr_SetString(PyExc_IndexError,
1619 PyErr_SetString(PyExc_IndexError,
1622 "index out of range");
1620 "index out of range");
1623 goto bail;
1621 goto bail;
1624 }
1622 }
1625 /* this cheesy bloom filter lets us avoid some more
1623 /* this cheesy bloom filter lets us avoid some more
1626 * expensive duplicate checks in the common set-is-disjoint
1624 * expensive duplicate checks in the common set-is-disjoint
1627 * case */
1625 * case */
1628 x = 1ull << (val & 0x3f);
1626 x = 1ull << (val & 0x3f);
1629 if (repeat & x) {
1627 if (repeat & x) {
1630 int k;
1628 int k;
1631 for (k = 0; k < revcount; k++) {
1629 for (k = 0; k < revcount; k++) {
1632 if (val == revs[k])
1630 if (val == revs[k])
1633 goto duplicate;
1631 goto duplicate;
1634 }
1632 }
1635 }
1633 }
1636 else repeat |= x;
1634 else repeat |= x;
1637 if (revcount >= capacity) {
1635 if (revcount >= capacity) {
1638 PyErr_Format(PyExc_OverflowError,
1636 PyErr_Format(PyExc_OverflowError,
1639 "bitset size (%d) > capacity (%d)",
1637 "bitset size (%d) > capacity (%d)",
1640 revcount, capacity);
1638 revcount, capacity);
1641 goto bail;
1639 goto bail;
1642 }
1640 }
1643 revs[revcount++] = (int)val;
1641 revs[revcount++] = (int)val;
1644 duplicate:;
1642 duplicate:;
1645 }
1643 }
1646
1644
1647 if (revcount == 0) {
1645 if (revcount == 0) {
1648 ret = PyList_New(0);
1646 ret = PyList_New(0);
1649 goto done;
1647 goto done;
1650 }
1648 }
1651 if (revcount == 1) {
1649 if (revcount == 1) {
1652 PyObject *obj;
1650 PyObject *obj;
1653 ret = PyList_New(1);
1651 ret = PyList_New(1);
1654 if (ret == NULL)
1652 if (ret == NULL)
1655 goto bail;
1653 goto bail;
1656 obj = PyInt_FromLong(revs[0]);
1654 obj = PyInt_FromLong(revs[0]);
1657 if (obj == NULL)
1655 if (obj == NULL)
1658 goto bail;
1656 goto bail;
1659 PyList_SET_ITEM(ret, 0, obj);
1657 PyList_SET_ITEM(ret, 0, obj);
1660 goto done;
1658 goto done;
1661 }
1659 }
1662
1660
1663 ret = find_gca_candidates(self, revs, revcount);
1661 ret = find_gca_candidates(self, revs, revcount);
1664 if (ret == NULL)
1662 if (ret == NULL)
1665 goto bail;
1663 goto bail;
1666
1664
1667 done:
1665 done:
1668 PyMem_Free(revs);
1666 PyMem_Free(revs);
1669 return ret;
1667 return ret;
1670
1668
1671 bail:
1669 bail:
1672 PyMem_Free(revs);
1670 PyMem_Free(revs);
1673 Py_XDECREF(ret);
1671 Py_XDECREF(ret);
1674 return NULL;
1672 return NULL;
1675 }
1673 }
1676
1674
1677 /*
1675 /*
1678 * Given a (possibly overlapping) set of revs, return the greatest
1676 * Given a (possibly overlapping) set of revs, return the greatest
1679 * common ancestors: those with the longest path to the root.
1677 * common ancestors: those with the longest path to the root.
1680 */
1678 */
1681 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1679 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1682 {
1680 {
1683 PyObject *ret;
1681 PyObject *ret;
1684 PyObject *gca = index_commonancestorsheads(self, args);
1682 PyObject *gca = index_commonancestorsheads(self, args);
1685 if (gca == NULL)
1683 if (gca == NULL)
1686 return NULL;
1684 return NULL;
1687
1685
1688 if (PyList_GET_SIZE(gca) <= 1) {
1686 if (PyList_GET_SIZE(gca) <= 1) {
1689 return gca;
1687 return gca;
1690 }
1688 }
1691
1689
1692 ret = find_deepest(self, gca);
1690 ret = find_deepest(self, gca);
1693 Py_DECREF(gca);
1691 Py_DECREF(gca);
1694 return ret;
1692 return ret;
1695 }
1693 }
1696
1694
1697 /*
1695 /*
1698 * Invalidate any trie entries introduced by added revs.
1696 * Invalidate any trie entries introduced by added revs.
1699 */
1697 */
1700 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1698 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1701 {
1699 {
1702 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1700 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1703
1701
1704 for (i = start; i < len; i++) {
1702 for (i = start; i < len; i++) {
1705 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1703 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1706 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1704 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1707
1705
1708 nt_insert(self, PyBytes_AS_STRING(node), -1);
1706 nt_insert(self, PyBytes_AS_STRING(node), -1);
1709 }
1707 }
1710
1708
1711 if (start == 0)
1709 if (start == 0)
1712 Py_CLEAR(self->added);
1710 Py_CLEAR(self->added);
1713 }
1711 }
1714
1712
1715 /*
1713 /*
1716 * Delete a numeric range of revs, which must be at the end of the
1714 * Delete a numeric range of revs, which must be at the end of the
1717 * range, but exclude the sentinel nullid entry.
1715 * range, but exclude the sentinel nullid entry.
1718 */
1716 */
1719 static int index_slice_del(indexObject *self, PyObject *item)
1717 static int index_slice_del(indexObject *self, PyObject *item)
1720 {
1718 {
1721 Py_ssize_t start, stop, step, slicelength;
1719 Py_ssize_t start, stop, step, slicelength;
1722 Py_ssize_t length = index_length(self);
1720 Py_ssize_t length = index_length(self);
1723 int ret = 0;
1721 int ret = 0;
1724
1722
1725 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1723 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1726 #ifdef IS_PY3K
1724 #ifdef IS_PY3K
1727 if (PySlice_GetIndicesEx(item, length,
1725 if (PySlice_GetIndicesEx(item, length,
1728 #else
1726 #else
1729 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1727 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1730 #endif
1728 #endif
1731 &start, &stop, &step, &slicelength) < 0)
1729 &start, &stop, &step, &slicelength) < 0)
1732 return -1;
1730 return -1;
1733
1731
1734 if (slicelength <= 0)
1732 if (slicelength <= 0)
1735 return 0;
1733 return 0;
1736
1734
1737 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1735 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1738 stop = start;
1736 stop = start;
1739
1737
1740 if (step < 0) {
1738 if (step < 0) {
1741 stop = start + 1;
1739 stop = start + 1;
1742 start = stop + step*(slicelength - 1) - 1;
1740 start = stop + step*(slicelength - 1) - 1;
1743 step = -step;
1741 step = -step;
1744 }
1742 }
1745
1743
1746 if (step != 1) {
1744 if (step != 1) {
1747 PyErr_SetString(PyExc_ValueError,
1745 PyErr_SetString(PyExc_ValueError,
1748 "revlog index delete requires step size of 1");
1746 "revlog index delete requires step size of 1");
1749 return -1;
1747 return -1;
1750 }
1748 }
1751
1749
1752 if (stop != length - 1) {
1750 if (stop != length - 1) {
1753 PyErr_SetString(PyExc_IndexError,
1751 PyErr_SetString(PyExc_IndexError,
1754 "revlog index deletion indices are invalid");
1752 "revlog index deletion indices are invalid");
1755 return -1;
1753 return -1;
1756 }
1754 }
1757
1755
1758 if (start < self->length - 1) {
1756 if (start < self->length - 1) {
1759 if (self->nt) {
1757 if (self->nt) {
1760 Py_ssize_t i;
1758 Py_ssize_t i;
1761
1759
1762 for (i = start + 1; i < self->length - 1; i++) {
1760 for (i = start + 1; i < self->length - 1; i++) {
1763 const char *node = index_node(self, i);
1761 const char *node = index_node(self, i);
1764
1762
1765 if (node)
1763 if (node)
1766 nt_insert(self, node, -1);
1764 nt_insert(self, node, -1);
1767 }
1765 }
1768 if (self->added)
1766 if (self->added)
1769 nt_invalidate_added(self, 0);
1767 nt_invalidate_added(self, 0);
1770 if (self->ntrev > start)
1768 if (self->ntrev > start)
1771 self->ntrev = (int)start;
1769 self->ntrev = (int)start;
1772 }
1770 }
1773 self->length = start + 1;
1771 self->length = start + 1;
1774 if (start < self->raw_length) {
1772 if (start < self->raw_length) {
1775 if (self->cache) {
1773 if (self->cache) {
1776 Py_ssize_t i;
1774 Py_ssize_t i;
1777 for (i = start; i < self->raw_length; i++)
1775 for (i = start; i < self->raw_length; i++)
1778 Py_CLEAR(self->cache[i]);
1776 Py_CLEAR(self->cache[i]);
1779 }
1777 }
1780 self->raw_length = start;
1778 self->raw_length = start;
1781 }
1779 }
1782 goto done;
1780 goto done;
1783 }
1781 }
1784
1782
1785 if (self->nt) {
1783 if (self->nt) {
1786 nt_invalidate_added(self, start - self->length + 1);
1784 nt_invalidate_added(self, start - self->length + 1);
1787 if (self->ntrev > start)
1785 if (self->ntrev > start)
1788 self->ntrev = (int)start;
1786 self->ntrev = (int)start;
1789 }
1787 }
1790 if (self->added)
1788 if (self->added)
1791 ret = PyList_SetSlice(self->added, start - self->length + 1,
1789 ret = PyList_SetSlice(self->added, start - self->length + 1,
1792 PyList_GET_SIZE(self->added), NULL);
1790 PyList_GET_SIZE(self->added), NULL);
1793 done:
1791 done:
1794 Py_CLEAR(self->headrevs);
1792 Py_CLEAR(self->headrevs);
1795 return ret;
1793 return ret;
1796 }
1794 }
1797
1795
1798 /*
1796 /*
1799 * Supported ops:
1797 * Supported ops:
1800 *
1798 *
1801 * slice deletion
1799 * slice deletion
1802 * string assignment (extend node->rev mapping)
1800 * string assignment (extend node->rev mapping)
1803 * string deletion (shrink node->rev mapping)
1801 * string deletion (shrink node->rev mapping)
1804 */
1802 */
1805 static int index_assign_subscript(indexObject *self, PyObject *item,
1803 static int index_assign_subscript(indexObject *self, PyObject *item,
1806 PyObject *value)
1804 PyObject *value)
1807 {
1805 {
1808 char *node;
1806 char *node;
1809 Py_ssize_t nodelen;
1807 Py_ssize_t nodelen;
1810 long rev;
1808 long rev;
1811
1809
1812 if (PySlice_Check(item) && value == NULL)
1810 if (PySlice_Check(item) && value == NULL)
1813 return index_slice_del(self, item);
1811 return index_slice_del(self, item);
1814
1812
1815 if (node_check(item, &node, &nodelen) == -1)
1813 if (node_check(item, &node, &nodelen) == -1)
1816 return -1;
1814 return -1;
1817
1815
1818 if (value == NULL)
1816 if (value == NULL)
1819 return self->nt ? nt_insert(self, node, -1) : 0;
1817 return self->nt ? nt_insert(self, node, -1) : 0;
1820 rev = PyInt_AsLong(value);
1818 rev = PyInt_AsLong(value);
1821 if (rev > INT_MAX || rev < 0) {
1819 if (rev > INT_MAX || rev < 0) {
1822 if (!PyErr_Occurred())
1820 if (!PyErr_Occurred())
1823 PyErr_SetString(PyExc_ValueError, "rev out of range");
1821 PyErr_SetString(PyExc_ValueError, "rev out of range");
1824 return -1;
1822 return -1;
1825 }
1823 }
1826
1824
1827 if (nt_init(self) == -1)
1825 if (nt_init(self) == -1)
1828 return -1;
1826 return -1;
1829 return nt_insert(self, node, (int)rev);
1827 return nt_insert(self, node, (int)rev);
1830 }
1828 }
1831
1829
1832 /*
1830 /*
1833 * Find all RevlogNG entries in an index that has inline data. Update
1831 * Find all RevlogNG entries in an index that has inline data. Update
1834 * the optional "offsets" table with those entries.
1832 * the optional "offsets" table with those entries.
1835 */
1833 */
1836 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1834 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1837 {
1835 {
1838 const char *data = (const char *)self->buf.buf;
1836 const char *data = (const char *)self->buf.buf;
1839 Py_ssize_t pos = 0;
1837 Py_ssize_t pos = 0;
1840 Py_ssize_t end = self->buf.len;
1838 Py_ssize_t end = self->buf.len;
1841 long incr = v1_hdrsize;
1839 long incr = v1_hdrsize;
1842 Py_ssize_t len = 0;
1840 Py_ssize_t len = 0;
1843
1841
1844 while (pos + v1_hdrsize <= end && pos >= 0) {
1842 while (pos + v1_hdrsize <= end && pos >= 0) {
1845 uint32_t comp_len;
1843 uint32_t comp_len;
1846 /* 3rd element of header is length of compressed inline data */
1844 /* 3rd element of header is length of compressed inline data */
1847 comp_len = getbe32(data + pos + 8);
1845 comp_len = getbe32(data + pos + 8);
1848 incr = v1_hdrsize + comp_len;
1846 incr = v1_hdrsize + comp_len;
1849 if (offsets)
1847 if (offsets)
1850 offsets[len] = data + pos;
1848 offsets[len] = data + pos;
1851 len++;
1849 len++;
1852 pos += incr;
1850 pos += incr;
1853 }
1851 }
1854
1852
1855 if (pos != end) {
1853 if (pos != end) {
1856 if (!PyErr_Occurred())
1854 if (!PyErr_Occurred())
1857 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1855 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1858 return -1;
1856 return -1;
1859 }
1857 }
1860
1858
1861 return len;
1859 return len;
1862 }
1860 }
1863
1861
1864 static int index_init(indexObject *self, PyObject *args)
1862 static int index_init(indexObject *self, PyObject *args)
1865 {
1863 {
1866 PyObject *data_obj, *inlined_obj;
1864 PyObject *data_obj, *inlined_obj;
1867 Py_ssize_t size;
1865 Py_ssize_t size;
1868
1866
1869 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1867 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1870 self->raw_length = 0;
1868 self->raw_length = 0;
1871 self->added = NULL;
1869 self->added = NULL;
1872 self->cache = NULL;
1870 self->cache = NULL;
1873 self->data = NULL;
1871 self->data = NULL;
1874 memset(&self->buf, 0, sizeof(self->buf));
1872 memset(&self->buf, 0, sizeof(self->buf));
1875 self->headrevs = NULL;
1873 self->headrevs = NULL;
1876 self->filteredrevs = Py_None;
1874 self->filteredrevs = Py_None;
1877 Py_INCREF(Py_None);
1875 Py_INCREF(Py_None);
1878 self->nt = NULL;
1876 self->nt = NULL;
1879 self->offsets = NULL;
1877 self->offsets = NULL;
1880
1878
1881 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1879 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1882 return -1;
1880 return -1;
1883 if (!PyObject_CheckBuffer(data_obj)) {
1881 if (!PyObject_CheckBuffer(data_obj)) {
1884 PyErr_SetString(PyExc_TypeError,
1882 PyErr_SetString(PyExc_TypeError,
1885 "data does not support buffer interface");
1883 "data does not support buffer interface");
1886 return -1;
1884 return -1;
1887 }
1885 }
1888
1886
1889 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1887 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1890 return -1;
1888 return -1;
1891 size = self->buf.len;
1889 size = self->buf.len;
1892
1890
1893 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1891 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1894 self->data = data_obj;
1892 self->data = data_obj;
1895
1893
1896 self->ntlength = self->ntcapacity = 0;
1894 self->ntlength = self->ntcapacity = 0;
1897 self->ntdepth = self->ntsplits = 0;
1895 self->ntdepth = self->ntsplits = 0;
1898 self->ntlookups = self->ntmisses = 0;
1896 self->ntlookups = self->ntmisses = 0;
1899 self->ntrev = -1;
1897 self->ntrev = -1;
1900 Py_INCREF(self->data);
1898 Py_INCREF(self->data);
1901
1899
1902 if (self->inlined) {
1900 if (self->inlined) {
1903 Py_ssize_t len = inline_scan(self, NULL);
1901 Py_ssize_t len = inline_scan(self, NULL);
1904 if (len == -1)
1902 if (len == -1)
1905 goto bail;
1903 goto bail;
1906 self->raw_length = len;
1904 self->raw_length = len;
1907 self->length = len + 1;
1905 self->length = len + 1;
1908 } else {
1906 } else {
1909 if (size % v1_hdrsize) {
1907 if (size % v1_hdrsize) {
1910 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1908 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1911 goto bail;
1909 goto bail;
1912 }
1910 }
1913 self->raw_length = size / v1_hdrsize;
1911 self->raw_length = size / v1_hdrsize;
1914 self->length = self->raw_length + 1;
1912 self->length = self->raw_length + 1;
1915 }
1913 }
1916
1914
1917 return 0;
1915 return 0;
1918 bail:
1916 bail:
1919 return -1;
1917 return -1;
1920 }
1918 }
1921
1919
1922 static PyObject *index_nodemap(indexObject *self)
1920 static PyObject *index_nodemap(indexObject *self)
1923 {
1921 {
1924 Py_INCREF(self);
1922 Py_INCREF(self);
1925 return (PyObject *)self;
1923 return (PyObject *)self;
1926 }
1924 }
1927
1925
1928 static void index_dealloc(indexObject *self)
1926 static void index_dealloc(indexObject *self)
1929 {
1927 {
1930 _index_clearcaches(self);
1928 _index_clearcaches(self);
1931 Py_XDECREF(self->filteredrevs);
1929 Py_XDECREF(self->filteredrevs);
1932 if (self->buf.buf) {
1930 if (self->buf.buf) {
1933 PyBuffer_Release(&self->buf);
1931 PyBuffer_Release(&self->buf);
1934 memset(&self->buf, 0, sizeof(self->buf));
1932 memset(&self->buf, 0, sizeof(self->buf));
1935 }
1933 }
1936 Py_XDECREF(self->data);
1934 Py_XDECREF(self->data);
1937 Py_XDECREF(self->added);
1935 Py_XDECREF(self->added);
1938 PyObject_Del(self);
1936 PyObject_Del(self);
1939 }
1937 }
1940
1938
1941 static PySequenceMethods index_sequence_methods = {
1939 static PySequenceMethods index_sequence_methods = {
1942 (lenfunc)index_length, /* sq_length */
1940 (lenfunc)index_length, /* sq_length */
1943 0, /* sq_concat */
1941 0, /* sq_concat */
1944 0, /* sq_repeat */
1942 0, /* sq_repeat */
1945 (ssizeargfunc)index_get, /* sq_item */
1943 (ssizeargfunc)index_get, /* sq_item */
1946 0, /* sq_slice */
1944 0, /* sq_slice */
1947 0, /* sq_ass_item */
1945 0, /* sq_ass_item */
1948 0, /* sq_ass_slice */
1946 0, /* sq_ass_slice */
1949 (objobjproc)index_contains, /* sq_contains */
1947 (objobjproc)index_contains, /* sq_contains */
1950 };
1948 };
1951
1949
1952 static PyMappingMethods index_mapping_methods = {
1950 static PyMappingMethods index_mapping_methods = {
1953 (lenfunc)index_length, /* mp_length */
1951 (lenfunc)index_length, /* mp_length */
1954 (binaryfunc)index_getitem, /* mp_subscript */
1952 (binaryfunc)index_getitem, /* mp_subscript */
1955 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1953 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1956 };
1954 };
1957
1955
1958 static PyMethodDef index_methods[] = {
1956 static PyMethodDef index_methods[] = {
1959 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1957 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1960 "return the gca set of the given revs"},
1958 "return the gca set of the given revs"},
1961 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
1959 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
1962 METH_VARARGS,
1960 METH_VARARGS,
1963 "return the heads of the common ancestors of the given revs"},
1961 "return the heads of the common ancestors of the given revs"},
1964 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1962 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1965 "clear the index caches"},
1963 "clear the index caches"},
1966 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1964 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1967 "get an index entry"},
1965 "get an index entry"},
1968 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
1966 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
1969 METH_VARARGS, "compute phases"},
1967 METH_VARARGS, "compute phases"},
1970 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
1968 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
1971 "reachableroots"},
1969 "reachableroots"},
1972 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
1970 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
1973 "get head revisions"}, /* Can do filtering since 3.2 */
1971 "get head revisions"}, /* Can do filtering since 3.2 */
1974 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
1972 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
1975 "get filtered head revisions"}, /* Can always do filtering */
1973 "get filtered head revisions"}, /* Can always do filtering */
1976 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
1974 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
1977 "determine revisions with deltas to reconstruct fulltext"},
1975 "determine revisions with deltas to reconstruct fulltext"},
1978 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1976 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1979 "insert an index entry"},
1977 "insert an index entry"},
1980 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1978 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1981 "match a potentially ambiguous node ID"},
1979 "match a potentially ambiguous node ID"},
1982 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1980 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1983 "stats for the index"},
1981 "stats for the index"},
1984 {NULL} /* Sentinel */
1982 {NULL} /* Sentinel */
1985 };
1983 };
1986
1984
1987 static PyGetSetDef index_getset[] = {
1985 static PyGetSetDef index_getset[] = {
1988 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1986 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1989 {NULL} /* Sentinel */
1987 {NULL} /* Sentinel */
1990 };
1988 };
1991
1989
1992 static PyTypeObject indexType = {
1990 static PyTypeObject indexType = {
1993 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1991 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1994 "parsers.index", /* tp_name */
1992 "parsers.index", /* tp_name */
1995 sizeof(indexObject), /* tp_basicsize */
1993 sizeof(indexObject), /* tp_basicsize */
1996 0, /* tp_itemsize */
1994 0, /* tp_itemsize */
1997 (destructor)index_dealloc, /* tp_dealloc */
1995 (destructor)index_dealloc, /* tp_dealloc */
1998 0, /* tp_print */
1996 0, /* tp_print */
1999 0, /* tp_getattr */
1997 0, /* tp_getattr */
2000 0, /* tp_setattr */
1998 0, /* tp_setattr */
2001 0, /* tp_compare */
1999 0, /* tp_compare */
2002 0, /* tp_repr */
2000 0, /* tp_repr */
2003 0, /* tp_as_number */
2001 0, /* tp_as_number */
2004 &index_sequence_methods, /* tp_as_sequence */
2002 &index_sequence_methods, /* tp_as_sequence */
2005 &index_mapping_methods, /* tp_as_mapping */
2003 &index_mapping_methods, /* tp_as_mapping */
2006 0, /* tp_hash */
2004 0, /* tp_hash */
2007 0, /* tp_call */
2005 0, /* tp_call */
2008 0, /* tp_str */
2006 0, /* tp_str */
2009 0, /* tp_getattro */
2007 0, /* tp_getattro */
2010 0, /* tp_setattro */
2008 0, /* tp_setattro */
2011 0, /* tp_as_buffer */
2009 0, /* tp_as_buffer */
2012 Py_TPFLAGS_DEFAULT, /* tp_flags */
2010 Py_TPFLAGS_DEFAULT, /* tp_flags */
2013 "revlog index", /* tp_doc */
2011 "revlog index", /* tp_doc */
2014 0, /* tp_traverse */
2012 0, /* tp_traverse */
2015 0, /* tp_clear */
2013 0, /* tp_clear */
2016 0, /* tp_richcompare */
2014 0, /* tp_richcompare */
2017 0, /* tp_weaklistoffset */
2015 0, /* tp_weaklistoffset */
2018 0, /* tp_iter */
2016 0, /* tp_iter */
2019 0, /* tp_iternext */
2017 0, /* tp_iternext */
2020 index_methods, /* tp_methods */
2018 index_methods, /* tp_methods */
2021 0, /* tp_members */
2019 0, /* tp_members */
2022 index_getset, /* tp_getset */
2020 index_getset, /* tp_getset */
2023 0, /* tp_base */
2021 0, /* tp_base */
2024 0, /* tp_dict */
2022 0, /* tp_dict */
2025 0, /* tp_descr_get */
2023 0, /* tp_descr_get */
2026 0, /* tp_descr_set */
2024 0, /* tp_descr_set */
2027 0, /* tp_dictoffset */
2025 0, /* tp_dictoffset */
2028 (initproc)index_init, /* tp_init */
2026 (initproc)index_init, /* tp_init */
2029 0, /* tp_alloc */
2027 0, /* tp_alloc */
2030 };
2028 };
2031
2029
2032 /*
2030 /*
2033 * returns a tuple of the form (index, index, cache) with elements as
2031 * returns a tuple of the form (index, index, cache) with elements as
2034 * follows:
2032 * follows:
2035 *
2033 *
2036 * index: an index object that lazily parses RevlogNG records
2034 * index: an index object that lazily parses RevlogNG records
2037 * cache: if data is inlined, a tuple (0, index_file_content), else None
2035 * cache: if data is inlined, a tuple (0, index_file_content), else None
2038 * index_file_content could be a string, or a buffer
2036 * index_file_content could be a string, or a buffer
2039 *
2037 *
2040 * added complications are for backwards compatibility
2038 * added complications are for backwards compatibility
2041 */
2039 */
2042 PyObject *parse_index2(PyObject *self, PyObject *args)
2040 PyObject *parse_index2(PyObject *self, PyObject *args)
2043 {
2041 {
2044 PyObject *tuple = NULL, *cache = NULL;
2042 PyObject *tuple = NULL, *cache = NULL;
2045 indexObject *idx;
2043 indexObject *idx;
2046 int ret;
2044 int ret;
2047
2045
2048 idx = PyObject_New(indexObject, &indexType);
2046 idx = PyObject_New(indexObject, &indexType);
2049 if (idx == NULL)
2047 if (idx == NULL)
2050 goto bail;
2048 goto bail;
2051
2049
2052 ret = index_init(idx, args);
2050 ret = index_init(idx, args);
2053 if (ret == -1)
2051 if (ret == -1)
2054 goto bail;
2052 goto bail;
2055
2053
2056 if (idx->inlined) {
2054 if (idx->inlined) {
2057 cache = Py_BuildValue("iO", 0, idx->data);
2055 cache = Py_BuildValue("iO", 0, idx->data);
2058 if (cache == NULL)
2056 if (cache == NULL)
2059 goto bail;
2057 goto bail;
2060 } else {
2058 } else {
2061 cache = Py_None;
2059 cache = Py_None;
2062 Py_INCREF(cache);
2060 Py_INCREF(cache);
2063 }
2061 }
2064
2062
2065 tuple = Py_BuildValue("NN", idx, cache);
2063 tuple = Py_BuildValue("NN", idx, cache);
2066 if (!tuple)
2064 if (!tuple)
2067 goto bail;
2065 goto bail;
2068 return tuple;
2066 return tuple;
2069
2067
2070 bail:
2068 bail:
2071 Py_XDECREF(idx);
2069 Py_XDECREF(idx);
2072 Py_XDECREF(cache);
2070 Py_XDECREF(cache);
2073 Py_XDECREF(tuple);
2071 Py_XDECREF(tuple);
2074 return NULL;
2072 return NULL;
2075 }
2073 }
2076
2074
2077 void revlog_module_init(PyObject *mod)
2075 void revlog_module_init(PyObject *mod)
2078 {
2076 {
2079 indexType.tp_new = PyType_GenericNew;
2077 indexType.tp_new = PyType_GenericNew;
2080 if (PyType_Ready(&indexType) < 0)
2078 if (PyType_Ready(&indexType) < 0)
2081 return;
2079 return;
2082 Py_INCREF(&indexType);
2080 Py_INCREF(&indexType);
2083 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2081 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2084
2082
2085 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2083 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2086 -1, -1, -1, -1, nullid, 20);
2084 -1, -1, -1, -1, nullid, 20);
2087 if (nullentry)
2085 if (nullentry)
2088 PyObject_GC_UnTrack(nullentry);
2086 PyObject_GC_UnTrack(nullentry);
2089 }
2087 }
General Comments 0
You need to be logged in to leave comments. Login now