##// END OF EJS Templates
parsers: use a lookup table to convert hex to binary...
Siddharth Agarwal -
r19718:d69e0672 default
parent child Browse files
Show More
@@ -1,1937 +1,1953 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <stddef.h>
12 #include <stddef.h>
13 #include <string.h>
13 #include <string.h>
14
14
15 #include "util.h"
15 #include "util.h"
16
16
17 static int8_t hextable[256] = {
18 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
22 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
23 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
34 };
35
17 static inline int hexdigit(const char *p, Py_ssize_t off)
36 static inline int hexdigit(const char *p, Py_ssize_t off)
18 {
37 {
19 char c = p[off];
38 int8_t val = hextable[(unsigned char)p[off]];
20
39
21 if (c >= '0' && c <= '9')
40 if (val >= 0) {
22 return c - '0';
41 return val;
23 if (c >= 'a' && c <= 'f')
42 }
24 return c - 'a' + 10;
25 if (c >= 'A' && c <= 'F')
26 return c - 'A' + 10;
27
43
28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
44 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
29 return 0;
45 return 0;
30 }
46 }
31
47
32 /*
48 /*
33 * Turn a hex-encoded string into binary.
49 * Turn a hex-encoded string into binary.
34 */
50 */
35 static PyObject *unhexlify(const char *str, int len)
51 static PyObject *unhexlify(const char *str, int len)
36 {
52 {
37 PyObject *ret;
53 PyObject *ret;
38 char *d;
54 char *d;
39 int i;
55 int i;
40
56
41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
57 ret = PyBytes_FromStringAndSize(NULL, len / 2);
42
58
43 if (!ret)
59 if (!ret)
44 return NULL;
60 return NULL;
45
61
46 d = PyBytes_AsString(ret);
62 d = PyBytes_AsString(ret);
47
63
48 for (i = 0; i < len;) {
64 for (i = 0; i < len;) {
49 int hi = hexdigit(str, i++);
65 int hi = hexdigit(str, i++);
50 int lo = hexdigit(str, i++);
66 int lo = hexdigit(str, i++);
51 *d++ = (hi << 4) | lo;
67 *d++ = (hi << 4) | lo;
52 }
68 }
53
69
54 return ret;
70 return ret;
55 }
71 }
56
72
57 /*
73 /*
58 * This code assumes that a manifest is stitched together with newline
74 * This code assumes that a manifest is stitched together with newline
59 * ('\n') characters.
75 * ('\n') characters.
60 */
76 */
61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
77 static PyObject *parse_manifest(PyObject *self, PyObject *args)
62 {
78 {
63 PyObject *mfdict, *fdict;
79 PyObject *mfdict, *fdict;
64 char *str, *cur, *start, *zero;
80 char *str, *cur, *start, *zero;
65 int len;
81 int len;
66
82
67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
83 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
68 &PyDict_Type, &mfdict,
84 &PyDict_Type, &mfdict,
69 &PyDict_Type, &fdict,
85 &PyDict_Type, &fdict,
70 &str, &len))
86 &str, &len))
71 goto quit;
87 goto quit;
72
88
73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
89 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
74 PyObject *file = NULL, *node = NULL;
90 PyObject *file = NULL, *node = NULL;
75 PyObject *flags = NULL;
91 PyObject *flags = NULL;
76 ptrdiff_t nlen;
92 ptrdiff_t nlen;
77
93
78 if (!*cur) {
94 if (!*cur) {
79 zero = cur;
95 zero = cur;
80 continue;
96 continue;
81 }
97 }
82 else if (*cur != '\n')
98 else if (*cur != '\n')
83 continue;
99 continue;
84
100
85 if (!zero) {
101 if (!zero) {
86 PyErr_SetString(PyExc_ValueError,
102 PyErr_SetString(PyExc_ValueError,
87 "manifest entry has no separator");
103 "manifest entry has no separator");
88 goto quit;
104 goto quit;
89 }
105 }
90
106
91 file = PyBytes_FromStringAndSize(start, zero - start);
107 file = PyBytes_FromStringAndSize(start, zero - start);
92
108
93 if (!file)
109 if (!file)
94 goto bail;
110 goto bail;
95
111
96 nlen = cur - zero - 1;
112 nlen = cur - zero - 1;
97
113
98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
114 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
99 if (!node)
115 if (!node)
100 goto bail;
116 goto bail;
101
117
102 if (nlen > 40) {
118 if (nlen > 40) {
103 flags = PyBytes_FromStringAndSize(zero + 41,
119 flags = PyBytes_FromStringAndSize(zero + 41,
104 nlen - 40);
120 nlen - 40);
105 if (!flags)
121 if (!flags)
106 goto bail;
122 goto bail;
107
123
108 if (PyDict_SetItem(fdict, file, flags) == -1)
124 if (PyDict_SetItem(fdict, file, flags) == -1)
109 goto bail;
125 goto bail;
110 }
126 }
111
127
112 if (PyDict_SetItem(mfdict, file, node) == -1)
128 if (PyDict_SetItem(mfdict, file, node) == -1)
113 goto bail;
129 goto bail;
114
130
115 start = cur + 1;
131 start = cur + 1;
116 zero = NULL;
132 zero = NULL;
117
133
118 Py_XDECREF(flags);
134 Py_XDECREF(flags);
119 Py_XDECREF(node);
135 Py_XDECREF(node);
120 Py_XDECREF(file);
136 Py_XDECREF(file);
121 continue;
137 continue;
122 bail:
138 bail:
123 Py_XDECREF(flags);
139 Py_XDECREF(flags);
124 Py_XDECREF(node);
140 Py_XDECREF(node);
125 Py_XDECREF(file);
141 Py_XDECREF(file);
126 goto quit;
142 goto quit;
127 }
143 }
128
144
129 if (len > 0 && *(cur - 1) != '\n') {
145 if (len > 0 && *(cur - 1) != '\n') {
130 PyErr_SetString(PyExc_ValueError,
146 PyErr_SetString(PyExc_ValueError,
131 "manifest contains trailing garbage");
147 "manifest contains trailing garbage");
132 goto quit;
148 goto quit;
133 }
149 }
134
150
135 Py_INCREF(Py_None);
151 Py_INCREF(Py_None);
136 return Py_None;
152 return Py_None;
137 quit:
153 quit:
138 return NULL;
154 return NULL;
139 }
155 }
140
156
141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
157 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
142 {
158 {
143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
159 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
160 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
145 char *str, *cur, *end, *cpos;
161 char *str, *cur, *end, *cpos;
146 int state, mode, size, mtime;
162 int state, mode, size, mtime;
147 unsigned int flen;
163 unsigned int flen;
148 int len;
164 int len;
149
165
150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
166 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
151 &PyDict_Type, &dmap,
167 &PyDict_Type, &dmap,
152 &PyDict_Type, &cmap,
168 &PyDict_Type, &cmap,
153 &str, &len))
169 &str, &len))
154 goto quit;
170 goto quit;
155
171
156 /* read parents */
172 /* read parents */
157 if (len < 40)
173 if (len < 40)
158 goto quit;
174 goto quit;
159
175
160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
176 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
161 if (!parents)
177 if (!parents)
162 goto quit;
178 goto quit;
163
179
164 /* read filenames */
180 /* read filenames */
165 cur = str + 40;
181 cur = str + 40;
166 end = str + len;
182 end = str + len;
167
183
168 while (cur < end - 17) {
184 while (cur < end - 17) {
169 /* unpack header */
185 /* unpack header */
170 state = *cur;
186 state = *cur;
171 mode = getbe32(cur + 1);
187 mode = getbe32(cur + 1);
172 size = getbe32(cur + 5);
188 size = getbe32(cur + 5);
173 mtime = getbe32(cur + 9);
189 mtime = getbe32(cur + 9);
174 flen = getbe32(cur + 13);
190 flen = getbe32(cur + 13);
175 cur += 17;
191 cur += 17;
176 if (cur + flen > end || cur + flen < cur) {
192 if (cur + flen > end || cur + flen < cur) {
177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
193 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
178 goto quit;
194 goto quit;
179 }
195 }
180
196
181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
197 entry = Py_BuildValue("ciii", state, mode, size, mtime);
182 if (!entry)
198 if (!entry)
183 goto quit;
199 goto quit;
184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
200 PyObject_GC_UnTrack(entry); /* don't waste time with this */
185
201
186 cpos = memchr(cur, 0, flen);
202 cpos = memchr(cur, 0, flen);
187 if (cpos) {
203 if (cpos) {
188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
204 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
189 cname = PyBytes_FromStringAndSize(cpos + 1,
205 cname = PyBytes_FromStringAndSize(cpos + 1,
190 flen - (cpos - cur) - 1);
206 flen - (cpos - cur) - 1);
191 if (!fname || !cname ||
207 if (!fname || !cname ||
192 PyDict_SetItem(cmap, fname, cname) == -1 ||
208 PyDict_SetItem(cmap, fname, cname) == -1 ||
193 PyDict_SetItem(dmap, fname, entry) == -1)
209 PyDict_SetItem(dmap, fname, entry) == -1)
194 goto quit;
210 goto quit;
195 Py_DECREF(cname);
211 Py_DECREF(cname);
196 } else {
212 } else {
197 fname = PyBytes_FromStringAndSize(cur, flen);
213 fname = PyBytes_FromStringAndSize(cur, flen);
198 if (!fname ||
214 if (!fname ||
199 PyDict_SetItem(dmap, fname, entry) == -1)
215 PyDict_SetItem(dmap, fname, entry) == -1)
200 goto quit;
216 goto quit;
201 }
217 }
202 cur += flen;
218 cur += flen;
203 Py_DECREF(fname);
219 Py_DECREF(fname);
204 Py_DECREF(entry);
220 Py_DECREF(entry);
205 fname = cname = entry = NULL;
221 fname = cname = entry = NULL;
206 }
222 }
207
223
208 ret = parents;
224 ret = parents;
209 Py_INCREF(ret);
225 Py_INCREF(ret);
210 quit:
226 quit:
211 Py_XDECREF(fname);
227 Py_XDECREF(fname);
212 Py_XDECREF(cname);
228 Py_XDECREF(cname);
213 Py_XDECREF(entry);
229 Py_XDECREF(entry);
214 Py_XDECREF(parents);
230 Py_XDECREF(parents);
215 return ret;
231 return ret;
216 }
232 }
217
233
218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
234 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
219 {
235 {
220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
236 PyObject *o = PyTuple_GET_ITEM(tuple, off);
221 long val;
237 long val;
222
238
223 if (PyInt_Check(o))
239 if (PyInt_Check(o))
224 val = PyInt_AS_LONG(o);
240 val = PyInt_AS_LONG(o);
225 else if (PyLong_Check(o)) {
241 else if (PyLong_Check(o)) {
226 val = PyLong_AsLong(o);
242 val = PyLong_AsLong(o);
227 if (val == -1 && PyErr_Occurred())
243 if (val == -1 && PyErr_Occurred())
228 return -1;
244 return -1;
229 } else {
245 } else {
230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
246 PyErr_SetString(PyExc_TypeError, "expected an int or long");
231 return -1;
247 return -1;
232 }
248 }
233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
249 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
234 PyErr_SetString(PyExc_OverflowError,
250 PyErr_SetString(PyExc_OverflowError,
235 "Python value to large to convert to uint32_t");
251 "Python value to large to convert to uint32_t");
236 return -1;
252 return -1;
237 }
253 }
238 *v = (uint32_t)val;
254 *v = (uint32_t)val;
239 return 0;
255 return 0;
240 }
256 }
241
257
242 static PyObject *dirstate_unset;
258 static PyObject *dirstate_unset;
243
259
244 /*
260 /*
245 * Efficiently pack a dirstate object into its on-disk format.
261 * Efficiently pack a dirstate object into its on-disk format.
246 */
262 */
247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
263 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
248 {
264 {
249 PyObject *packobj = NULL;
265 PyObject *packobj = NULL;
250 PyObject *map, *copymap, *pl;
266 PyObject *map, *copymap, *pl;
251 Py_ssize_t nbytes, pos, l;
267 Py_ssize_t nbytes, pos, l;
252 PyObject *k, *v, *pn;
268 PyObject *k, *v, *pn;
253 char *p, *s;
269 char *p, *s;
254 double now;
270 double now;
255
271
256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
272 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
257 &PyDict_Type, &map, &PyDict_Type, &copymap,
273 &PyDict_Type, &map, &PyDict_Type, &copymap,
258 &pl, &now))
274 &pl, &now))
259 return NULL;
275 return NULL;
260
276
261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
277 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
278 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
263 return NULL;
279 return NULL;
264 }
280 }
265
281
266 /* Figure out how much we need to allocate. */
282 /* Figure out how much we need to allocate. */
267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
283 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
268 PyObject *c;
284 PyObject *c;
269 if (!PyString_Check(k)) {
285 if (!PyString_Check(k)) {
270 PyErr_SetString(PyExc_TypeError, "expected string key");
286 PyErr_SetString(PyExc_TypeError, "expected string key");
271 goto bail;
287 goto bail;
272 }
288 }
273 nbytes += PyString_GET_SIZE(k) + 17;
289 nbytes += PyString_GET_SIZE(k) + 17;
274 c = PyDict_GetItem(copymap, k);
290 c = PyDict_GetItem(copymap, k);
275 if (c) {
291 if (c) {
276 if (!PyString_Check(c)) {
292 if (!PyString_Check(c)) {
277 PyErr_SetString(PyExc_TypeError,
293 PyErr_SetString(PyExc_TypeError,
278 "expected string key");
294 "expected string key");
279 goto bail;
295 goto bail;
280 }
296 }
281 nbytes += PyString_GET_SIZE(c) + 1;
297 nbytes += PyString_GET_SIZE(c) + 1;
282 }
298 }
283 }
299 }
284
300
285 packobj = PyString_FromStringAndSize(NULL, nbytes);
301 packobj = PyString_FromStringAndSize(NULL, nbytes);
286 if (packobj == NULL)
302 if (packobj == NULL)
287 goto bail;
303 goto bail;
288
304
289 p = PyString_AS_STRING(packobj);
305 p = PyString_AS_STRING(packobj);
290
306
291 pn = PySequence_ITEM(pl, 0);
307 pn = PySequence_ITEM(pl, 0);
292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
308 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
309 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
294 goto bail;
310 goto bail;
295 }
311 }
296 memcpy(p, s, l);
312 memcpy(p, s, l);
297 p += 20;
313 p += 20;
298 pn = PySequence_ITEM(pl, 1);
314 pn = PySequence_ITEM(pl, 1);
299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
315 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
316 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
301 goto bail;
317 goto bail;
302 }
318 }
303 memcpy(p, s, l);
319 memcpy(p, s, l);
304 p += 20;
320 p += 20;
305
321
306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
322 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
307 uint32_t mode, size, mtime;
323 uint32_t mode, size, mtime;
308 Py_ssize_t len, l;
324 Py_ssize_t len, l;
309 PyObject *o;
325 PyObject *o;
310 char *s, *t;
326 char *s, *t;
311
327
312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
328 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
329 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
314 goto bail;
330 goto bail;
315 }
331 }
316 o = PyTuple_GET_ITEM(v, 0);
332 o = PyTuple_GET_ITEM(v, 0);
317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
333 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
318 PyErr_SetString(PyExc_TypeError, "expected one byte");
334 PyErr_SetString(PyExc_TypeError, "expected one byte");
319 goto bail;
335 goto bail;
320 }
336 }
321 *p++ = *s;
337 *p++ = *s;
322 if (getintat(v, 1, &mode) == -1)
338 if (getintat(v, 1, &mode) == -1)
323 goto bail;
339 goto bail;
324 if (getintat(v, 2, &size) == -1)
340 if (getintat(v, 2, &size) == -1)
325 goto bail;
341 goto bail;
326 if (getintat(v, 3, &mtime) == -1)
342 if (getintat(v, 3, &mtime) == -1)
327 goto bail;
343 goto bail;
328 if (*s == 'n' && mtime == (uint32_t)now) {
344 if (*s == 'n' && mtime == (uint32_t)now) {
329 /* See pure/parsers.py:pack_dirstate for why we do
345 /* See pure/parsers.py:pack_dirstate for why we do
330 * this. */
346 * this. */
331 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
347 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
332 goto bail;
348 goto bail;
333 mtime = -1;
349 mtime = -1;
334 }
350 }
335 putbe32(mode, p);
351 putbe32(mode, p);
336 putbe32(size, p + 4);
352 putbe32(size, p + 4);
337 putbe32(mtime, p + 8);
353 putbe32(mtime, p + 8);
338 t = p + 12;
354 t = p + 12;
339 p += 16;
355 p += 16;
340 len = PyString_GET_SIZE(k);
356 len = PyString_GET_SIZE(k);
341 memcpy(p, PyString_AS_STRING(k), len);
357 memcpy(p, PyString_AS_STRING(k), len);
342 p += len;
358 p += len;
343 o = PyDict_GetItem(copymap, k);
359 o = PyDict_GetItem(copymap, k);
344 if (o) {
360 if (o) {
345 *p++ = '\0';
361 *p++ = '\0';
346 l = PyString_GET_SIZE(o);
362 l = PyString_GET_SIZE(o);
347 memcpy(p, PyString_AS_STRING(o), l);
363 memcpy(p, PyString_AS_STRING(o), l);
348 p += l;
364 p += l;
349 len += l + 1;
365 len += l + 1;
350 }
366 }
351 putbe32((uint32_t)len, t);
367 putbe32((uint32_t)len, t);
352 }
368 }
353
369
354 pos = p - PyString_AS_STRING(packobj);
370 pos = p - PyString_AS_STRING(packobj);
355 if (pos != nbytes) {
371 if (pos != nbytes) {
356 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
372 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
357 (long)pos, (long)nbytes);
373 (long)pos, (long)nbytes);
358 goto bail;
374 goto bail;
359 }
375 }
360
376
361 return packobj;
377 return packobj;
362 bail:
378 bail:
363 Py_XDECREF(packobj);
379 Py_XDECREF(packobj);
364 return NULL;
380 return NULL;
365 }
381 }
366
382
367 /*
383 /*
368 * A base-16 trie for fast node->rev mapping.
384 * A base-16 trie for fast node->rev mapping.
369 *
385 *
370 * Positive value is index of the next node in the trie
386 * Positive value is index of the next node in the trie
371 * Negative value is a leaf: -(rev + 1)
387 * Negative value is a leaf: -(rev + 1)
372 * Zero is empty
388 * Zero is empty
373 */
389 */
374 typedef struct {
390 typedef struct {
375 int children[16];
391 int children[16];
376 } nodetree;
392 } nodetree;
377
393
378 /*
394 /*
379 * This class has two behaviours.
395 * This class has two behaviours.
380 *
396 *
381 * When used in a list-like way (with integer keys), we decode an
397 * When used in a list-like way (with integer keys), we decode an
382 * entry in a RevlogNG index file on demand. Our last entry is a
398 * entry in a RevlogNG index file on demand. Our last entry is a
383 * sentinel, always a nullid. We have limited support for
399 * sentinel, always a nullid. We have limited support for
384 * integer-keyed insert and delete, only at elements right before the
400 * integer-keyed insert and delete, only at elements right before the
385 * sentinel.
401 * sentinel.
386 *
402 *
387 * With string keys, we lazily perform a reverse mapping from node to
403 * With string keys, we lazily perform a reverse mapping from node to
388 * rev, using a base-16 trie.
404 * rev, using a base-16 trie.
389 */
405 */
390 typedef struct {
406 typedef struct {
391 PyObject_HEAD
407 PyObject_HEAD
392 /* Type-specific fields go here. */
408 /* Type-specific fields go here. */
393 PyObject *data; /* raw bytes of index */
409 PyObject *data; /* raw bytes of index */
394 PyObject **cache; /* cached tuples */
410 PyObject **cache; /* cached tuples */
395 const char **offsets; /* populated on demand */
411 const char **offsets; /* populated on demand */
396 Py_ssize_t raw_length; /* original number of elements */
412 Py_ssize_t raw_length; /* original number of elements */
397 Py_ssize_t length; /* current number of elements */
413 Py_ssize_t length; /* current number of elements */
398 PyObject *added; /* populated on demand */
414 PyObject *added; /* populated on demand */
399 PyObject *headrevs; /* cache, invalidated on changes */
415 PyObject *headrevs; /* cache, invalidated on changes */
400 nodetree *nt; /* base-16 trie */
416 nodetree *nt; /* base-16 trie */
401 int ntlength; /* # nodes in use */
417 int ntlength; /* # nodes in use */
402 int ntcapacity; /* # nodes allocated */
418 int ntcapacity; /* # nodes allocated */
403 int ntdepth; /* maximum depth of tree */
419 int ntdepth; /* maximum depth of tree */
404 int ntsplits; /* # splits performed */
420 int ntsplits; /* # splits performed */
405 int ntrev; /* last rev scanned */
421 int ntrev; /* last rev scanned */
406 int ntlookups; /* # lookups */
422 int ntlookups; /* # lookups */
407 int ntmisses; /* # lookups that miss the cache */
423 int ntmisses; /* # lookups that miss the cache */
408 int inlined;
424 int inlined;
409 } indexObject;
425 } indexObject;
410
426
411 static Py_ssize_t index_length(const indexObject *self)
427 static Py_ssize_t index_length(const indexObject *self)
412 {
428 {
413 if (self->added == NULL)
429 if (self->added == NULL)
414 return self->length;
430 return self->length;
415 return self->length + PyList_GET_SIZE(self->added);
431 return self->length + PyList_GET_SIZE(self->added);
416 }
432 }
417
433
418 static PyObject *nullentry;
434 static PyObject *nullentry;
419 static const char nullid[20];
435 static const char nullid[20];
420
436
421 static long inline_scan(indexObject *self, const char **offsets);
437 static long inline_scan(indexObject *self, const char **offsets);
422
438
423 #if LONG_MAX == 0x7fffffffL
439 #if LONG_MAX == 0x7fffffffL
424 static char *tuple_format = "Kiiiiiis#";
440 static char *tuple_format = "Kiiiiiis#";
425 #else
441 #else
426 static char *tuple_format = "kiiiiiis#";
442 static char *tuple_format = "kiiiiiis#";
427 #endif
443 #endif
428
444
429 /* A RevlogNG v1 index entry is 64 bytes long. */
445 /* A RevlogNG v1 index entry is 64 bytes long. */
430 static const long v1_hdrsize = 64;
446 static const long v1_hdrsize = 64;
431
447
432 /*
448 /*
433 * Return a pointer to the beginning of a RevlogNG record.
449 * Return a pointer to the beginning of a RevlogNG record.
434 */
450 */
435 static const char *index_deref(indexObject *self, Py_ssize_t pos)
451 static const char *index_deref(indexObject *self, Py_ssize_t pos)
436 {
452 {
437 if (self->inlined && pos > 0) {
453 if (self->inlined && pos > 0) {
438 if (self->offsets == NULL) {
454 if (self->offsets == NULL) {
439 self->offsets = malloc(self->raw_length *
455 self->offsets = malloc(self->raw_length *
440 sizeof(*self->offsets));
456 sizeof(*self->offsets));
441 if (self->offsets == NULL)
457 if (self->offsets == NULL)
442 return (const char *)PyErr_NoMemory();
458 return (const char *)PyErr_NoMemory();
443 inline_scan(self, self->offsets);
459 inline_scan(self, self->offsets);
444 }
460 }
445 return self->offsets[pos];
461 return self->offsets[pos];
446 }
462 }
447
463
448 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
464 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
449 }
465 }
450
466
451 /*
467 /*
452 * RevlogNG format (all in big endian, data may be inlined):
468 * RevlogNG format (all in big endian, data may be inlined):
453 * 6 bytes: offset
469 * 6 bytes: offset
454 * 2 bytes: flags
470 * 2 bytes: flags
455 * 4 bytes: compressed length
471 * 4 bytes: compressed length
456 * 4 bytes: uncompressed length
472 * 4 bytes: uncompressed length
457 * 4 bytes: base revision
473 * 4 bytes: base revision
458 * 4 bytes: link revision
474 * 4 bytes: link revision
459 * 4 bytes: parent 1 revision
475 * 4 bytes: parent 1 revision
460 * 4 bytes: parent 2 revision
476 * 4 bytes: parent 2 revision
461 * 32 bytes: nodeid (only 20 bytes used)
477 * 32 bytes: nodeid (only 20 bytes used)
462 */
478 */
463 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
479 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
464 {
480 {
465 uint64_t offset_flags;
481 uint64_t offset_flags;
466 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
482 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
467 const char *c_node_id;
483 const char *c_node_id;
468 const char *data;
484 const char *data;
469 Py_ssize_t length = index_length(self);
485 Py_ssize_t length = index_length(self);
470 PyObject *entry;
486 PyObject *entry;
471
487
472 if (pos < 0)
488 if (pos < 0)
473 pos += length;
489 pos += length;
474
490
475 if (pos < 0 || pos >= length) {
491 if (pos < 0 || pos >= length) {
476 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
492 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
477 return NULL;
493 return NULL;
478 }
494 }
479
495
480 if (pos == length - 1) {
496 if (pos == length - 1) {
481 Py_INCREF(nullentry);
497 Py_INCREF(nullentry);
482 return nullentry;
498 return nullentry;
483 }
499 }
484
500
485 if (pos >= self->length - 1) {
501 if (pos >= self->length - 1) {
486 PyObject *obj;
502 PyObject *obj;
487 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
503 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
488 Py_INCREF(obj);
504 Py_INCREF(obj);
489 return obj;
505 return obj;
490 }
506 }
491
507
492 if (self->cache) {
508 if (self->cache) {
493 if (self->cache[pos]) {
509 if (self->cache[pos]) {
494 Py_INCREF(self->cache[pos]);
510 Py_INCREF(self->cache[pos]);
495 return self->cache[pos];
511 return self->cache[pos];
496 }
512 }
497 } else {
513 } else {
498 self->cache = calloc(self->raw_length, sizeof(PyObject *));
514 self->cache = calloc(self->raw_length, sizeof(PyObject *));
499 if (self->cache == NULL)
515 if (self->cache == NULL)
500 return PyErr_NoMemory();
516 return PyErr_NoMemory();
501 }
517 }
502
518
503 data = index_deref(self, pos);
519 data = index_deref(self, pos);
504 if (data == NULL)
520 if (data == NULL)
505 return NULL;
521 return NULL;
506
522
507 offset_flags = getbe32(data + 4);
523 offset_flags = getbe32(data + 4);
508 if (pos == 0) /* mask out version number for the first entry */
524 if (pos == 0) /* mask out version number for the first entry */
509 offset_flags &= 0xFFFF;
525 offset_flags &= 0xFFFF;
510 else {
526 else {
511 uint32_t offset_high = getbe32(data);
527 uint32_t offset_high = getbe32(data);
512 offset_flags |= ((uint64_t)offset_high) << 32;
528 offset_flags |= ((uint64_t)offset_high) << 32;
513 }
529 }
514
530
515 comp_len = getbe32(data + 8);
531 comp_len = getbe32(data + 8);
516 uncomp_len = getbe32(data + 12);
532 uncomp_len = getbe32(data + 12);
517 base_rev = getbe32(data + 16);
533 base_rev = getbe32(data + 16);
518 link_rev = getbe32(data + 20);
534 link_rev = getbe32(data + 20);
519 parent_1 = getbe32(data + 24);
535 parent_1 = getbe32(data + 24);
520 parent_2 = getbe32(data + 28);
536 parent_2 = getbe32(data + 28);
521 c_node_id = data + 32;
537 c_node_id = data + 32;
522
538
523 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
539 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
524 uncomp_len, base_rev, link_rev,
540 uncomp_len, base_rev, link_rev,
525 parent_1, parent_2, c_node_id, 20);
541 parent_1, parent_2, c_node_id, 20);
526
542
527 if (entry)
543 if (entry)
528 PyObject_GC_UnTrack(entry);
544 PyObject_GC_UnTrack(entry);
529
545
530 self->cache[pos] = entry;
546 self->cache[pos] = entry;
531 Py_INCREF(entry);
547 Py_INCREF(entry);
532
548
533 return entry;
549 return entry;
534 }
550 }
535
551
536 /*
552 /*
537 * Return the 20-byte SHA of the node corresponding to the given rev.
553 * Return the 20-byte SHA of the node corresponding to the given rev.
538 */
554 */
539 static const char *index_node(indexObject *self, Py_ssize_t pos)
555 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 {
556 {
541 Py_ssize_t length = index_length(self);
557 Py_ssize_t length = index_length(self);
542 const char *data;
558 const char *data;
543
559
544 if (pos == length - 1 || pos == INT_MAX)
560 if (pos == length - 1 || pos == INT_MAX)
545 return nullid;
561 return nullid;
546
562
547 if (pos >= length)
563 if (pos >= length)
548 return NULL;
564 return NULL;
549
565
550 if (pos >= self->length - 1) {
566 if (pos >= self->length - 1) {
551 PyObject *tuple, *str;
567 PyObject *tuple, *str;
552 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
568 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
553 str = PyTuple_GetItem(tuple, 7);
569 str = PyTuple_GetItem(tuple, 7);
554 return str ? PyString_AS_STRING(str) : NULL;
570 return str ? PyString_AS_STRING(str) : NULL;
555 }
571 }
556
572
557 data = index_deref(self, pos);
573 data = index_deref(self, pos);
558 return data ? data + 32 : NULL;
574 return data ? data + 32 : NULL;
559 }
575 }
560
576
561 static int nt_insert(indexObject *self, const char *node, int rev);
577 static int nt_insert(indexObject *self, const char *node, int rev);
562
578
563 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
579 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
564 {
580 {
565 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
581 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
566 return -1;
582 return -1;
567 if (*nodelen == 20)
583 if (*nodelen == 20)
568 return 0;
584 return 0;
569 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
585 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
570 return -1;
586 return -1;
571 }
587 }
572
588
573 static PyObject *index_insert(indexObject *self, PyObject *args)
589 static PyObject *index_insert(indexObject *self, PyObject *args)
574 {
590 {
575 PyObject *obj;
591 PyObject *obj;
576 char *node;
592 char *node;
577 long offset;
593 long offset;
578 Py_ssize_t len, nodelen;
594 Py_ssize_t len, nodelen;
579
595
580 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
596 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
581 return NULL;
597 return NULL;
582
598
583 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
599 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
584 PyErr_SetString(PyExc_TypeError, "8-tuple required");
600 PyErr_SetString(PyExc_TypeError, "8-tuple required");
585 return NULL;
601 return NULL;
586 }
602 }
587
603
588 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
604 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
589 return NULL;
605 return NULL;
590
606
591 len = index_length(self);
607 len = index_length(self);
592
608
593 if (offset < 0)
609 if (offset < 0)
594 offset += len;
610 offset += len;
595
611
596 if (offset != len - 1) {
612 if (offset != len - 1) {
597 PyErr_SetString(PyExc_IndexError,
613 PyErr_SetString(PyExc_IndexError,
598 "insert only supported at index -1");
614 "insert only supported at index -1");
599 return NULL;
615 return NULL;
600 }
616 }
601
617
602 if (offset > INT_MAX) {
618 if (offset > INT_MAX) {
603 PyErr_SetString(PyExc_ValueError,
619 PyErr_SetString(PyExc_ValueError,
604 "currently only 2**31 revs supported");
620 "currently only 2**31 revs supported");
605 return NULL;
621 return NULL;
606 }
622 }
607
623
608 if (self->added == NULL) {
624 if (self->added == NULL) {
609 self->added = PyList_New(0);
625 self->added = PyList_New(0);
610 if (self->added == NULL)
626 if (self->added == NULL)
611 return NULL;
627 return NULL;
612 }
628 }
613
629
614 if (PyList_Append(self->added, obj) == -1)
630 if (PyList_Append(self->added, obj) == -1)
615 return NULL;
631 return NULL;
616
632
617 if (self->nt)
633 if (self->nt)
618 nt_insert(self, node, (int)offset);
634 nt_insert(self, node, (int)offset);
619
635
620 Py_CLEAR(self->headrevs);
636 Py_CLEAR(self->headrevs);
621 Py_RETURN_NONE;
637 Py_RETURN_NONE;
622 }
638 }
623
639
624 static void _index_clearcaches(indexObject *self)
640 static void _index_clearcaches(indexObject *self)
625 {
641 {
626 if (self->cache) {
642 if (self->cache) {
627 Py_ssize_t i;
643 Py_ssize_t i;
628
644
629 for (i = 0; i < self->raw_length; i++)
645 for (i = 0; i < self->raw_length; i++)
630 Py_CLEAR(self->cache[i]);
646 Py_CLEAR(self->cache[i]);
631 free(self->cache);
647 free(self->cache);
632 self->cache = NULL;
648 self->cache = NULL;
633 }
649 }
634 if (self->offsets) {
650 if (self->offsets) {
635 free(self->offsets);
651 free(self->offsets);
636 self->offsets = NULL;
652 self->offsets = NULL;
637 }
653 }
638 if (self->nt) {
654 if (self->nt) {
639 free(self->nt);
655 free(self->nt);
640 self->nt = NULL;
656 self->nt = NULL;
641 }
657 }
642 Py_CLEAR(self->headrevs);
658 Py_CLEAR(self->headrevs);
643 }
659 }
644
660
645 static PyObject *index_clearcaches(indexObject *self)
661 static PyObject *index_clearcaches(indexObject *self)
646 {
662 {
647 _index_clearcaches(self);
663 _index_clearcaches(self);
648 self->ntlength = self->ntcapacity = 0;
664 self->ntlength = self->ntcapacity = 0;
649 self->ntdepth = self->ntsplits = 0;
665 self->ntdepth = self->ntsplits = 0;
650 self->ntrev = -1;
666 self->ntrev = -1;
651 self->ntlookups = self->ntmisses = 0;
667 self->ntlookups = self->ntmisses = 0;
652 Py_RETURN_NONE;
668 Py_RETURN_NONE;
653 }
669 }
654
670
655 static PyObject *index_stats(indexObject *self)
671 static PyObject *index_stats(indexObject *self)
656 {
672 {
657 PyObject *obj = PyDict_New();
673 PyObject *obj = PyDict_New();
658
674
659 if (obj == NULL)
675 if (obj == NULL)
660 return NULL;
676 return NULL;
661
677
662 #define istat(__n, __d) \
678 #define istat(__n, __d) \
663 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
679 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
664 goto bail;
680 goto bail;
665
681
666 if (self->added) {
682 if (self->added) {
667 Py_ssize_t len = PyList_GET_SIZE(self->added);
683 Py_ssize_t len = PyList_GET_SIZE(self->added);
668 if (PyDict_SetItemString(obj, "index entries added",
684 if (PyDict_SetItemString(obj, "index entries added",
669 PyInt_FromSsize_t(len)) == -1)
685 PyInt_FromSsize_t(len)) == -1)
670 goto bail;
686 goto bail;
671 }
687 }
672
688
673 if (self->raw_length != self->length - 1)
689 if (self->raw_length != self->length - 1)
674 istat(raw_length, "revs on disk");
690 istat(raw_length, "revs on disk");
675 istat(length, "revs in memory");
691 istat(length, "revs in memory");
676 istat(ntcapacity, "node trie capacity");
692 istat(ntcapacity, "node trie capacity");
677 istat(ntdepth, "node trie depth");
693 istat(ntdepth, "node trie depth");
678 istat(ntlength, "node trie count");
694 istat(ntlength, "node trie count");
679 istat(ntlookups, "node trie lookups");
695 istat(ntlookups, "node trie lookups");
680 istat(ntmisses, "node trie misses");
696 istat(ntmisses, "node trie misses");
681 istat(ntrev, "node trie last rev scanned");
697 istat(ntrev, "node trie last rev scanned");
682 istat(ntsplits, "node trie splits");
698 istat(ntsplits, "node trie splits");
683
699
684 #undef istat
700 #undef istat
685
701
686 return obj;
702 return obj;
687
703
688 bail:
704 bail:
689 Py_XDECREF(obj);
705 Py_XDECREF(obj);
690 return NULL;
706 return NULL;
691 }
707 }
692
708
693 /*
709 /*
694 * When we cache a list, we want to be sure the caller can't mutate
710 * When we cache a list, we want to be sure the caller can't mutate
695 * the cached copy.
711 * the cached copy.
696 */
712 */
697 static PyObject *list_copy(PyObject *list)
713 static PyObject *list_copy(PyObject *list)
698 {
714 {
699 Py_ssize_t len = PyList_GET_SIZE(list);
715 Py_ssize_t len = PyList_GET_SIZE(list);
700 PyObject *newlist = PyList_New(len);
716 PyObject *newlist = PyList_New(len);
701 Py_ssize_t i;
717 Py_ssize_t i;
702
718
703 if (newlist == NULL)
719 if (newlist == NULL)
704 return NULL;
720 return NULL;
705
721
706 for (i = 0; i < len; i++) {
722 for (i = 0; i < len; i++) {
707 PyObject *obj = PyList_GET_ITEM(list, i);
723 PyObject *obj = PyList_GET_ITEM(list, i);
708 Py_INCREF(obj);
724 Py_INCREF(obj);
709 PyList_SET_ITEM(newlist, i, obj);
725 PyList_SET_ITEM(newlist, i, obj);
710 }
726 }
711
727
712 return newlist;
728 return newlist;
713 }
729 }
714
730
715 static PyObject *index_headrevs(indexObject *self)
731 static PyObject *index_headrevs(indexObject *self)
716 {
732 {
717 Py_ssize_t i, len, addlen;
733 Py_ssize_t i, len, addlen;
718 char *nothead = NULL;
734 char *nothead = NULL;
719 PyObject *heads;
735 PyObject *heads;
720
736
721 if (self->headrevs)
737 if (self->headrevs)
722 return list_copy(self->headrevs);
738 return list_copy(self->headrevs);
723
739
724 len = index_length(self) - 1;
740 len = index_length(self) - 1;
725 heads = PyList_New(0);
741 heads = PyList_New(0);
726 if (heads == NULL)
742 if (heads == NULL)
727 goto bail;
743 goto bail;
728 if (len == 0) {
744 if (len == 0) {
729 PyObject *nullid = PyInt_FromLong(-1);
745 PyObject *nullid = PyInt_FromLong(-1);
730 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
746 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
731 Py_XDECREF(nullid);
747 Py_XDECREF(nullid);
732 goto bail;
748 goto bail;
733 }
749 }
734 goto done;
750 goto done;
735 }
751 }
736
752
737 nothead = calloc(len, 1);
753 nothead = calloc(len, 1);
738 if (nothead == NULL)
754 if (nothead == NULL)
739 goto bail;
755 goto bail;
740
756
741 for (i = 0; i < self->raw_length; i++) {
757 for (i = 0; i < self->raw_length; i++) {
742 const char *data = index_deref(self, i);
758 const char *data = index_deref(self, i);
743 int parent_1 = getbe32(data + 24);
759 int parent_1 = getbe32(data + 24);
744 int parent_2 = getbe32(data + 28);
760 int parent_2 = getbe32(data + 28);
745 if (parent_1 >= 0)
761 if (parent_1 >= 0)
746 nothead[parent_1] = 1;
762 nothead[parent_1] = 1;
747 if (parent_2 >= 0)
763 if (parent_2 >= 0)
748 nothead[parent_2] = 1;
764 nothead[parent_2] = 1;
749 }
765 }
750
766
751 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
767 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
752
768
753 for (i = 0; i < addlen; i++) {
769 for (i = 0; i < addlen; i++) {
754 PyObject *rev = PyList_GET_ITEM(self->added, i);
770 PyObject *rev = PyList_GET_ITEM(self->added, i);
755 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
771 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
756 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
772 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
757 long parent_1, parent_2;
773 long parent_1, parent_2;
758
774
759 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
775 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
760 PyErr_SetString(PyExc_TypeError,
776 PyErr_SetString(PyExc_TypeError,
761 "revlog parents are invalid");
777 "revlog parents are invalid");
762 goto bail;
778 goto bail;
763 }
779 }
764 parent_1 = PyInt_AS_LONG(p1);
780 parent_1 = PyInt_AS_LONG(p1);
765 parent_2 = PyInt_AS_LONG(p2);
781 parent_2 = PyInt_AS_LONG(p2);
766 if (parent_1 >= 0)
782 if (parent_1 >= 0)
767 nothead[parent_1] = 1;
783 nothead[parent_1] = 1;
768 if (parent_2 >= 0)
784 if (parent_2 >= 0)
769 nothead[parent_2] = 1;
785 nothead[parent_2] = 1;
770 }
786 }
771
787
772 for (i = 0; i < len; i++) {
788 for (i = 0; i < len; i++) {
773 PyObject *head;
789 PyObject *head;
774
790
775 if (nothead[i])
791 if (nothead[i])
776 continue;
792 continue;
777 head = PyInt_FromLong(i);
793 head = PyInt_FromLong(i);
778 if (head == NULL || PyList_Append(heads, head) == -1) {
794 if (head == NULL || PyList_Append(heads, head) == -1) {
779 Py_XDECREF(head);
795 Py_XDECREF(head);
780 goto bail;
796 goto bail;
781 }
797 }
782 }
798 }
783
799
784 done:
800 done:
785 self->headrevs = heads;
801 self->headrevs = heads;
786 free(nothead);
802 free(nothead);
787 return list_copy(self->headrevs);
803 return list_copy(self->headrevs);
788 bail:
804 bail:
789 Py_XDECREF(heads);
805 Py_XDECREF(heads);
790 free(nothead);
806 free(nothead);
791 return NULL;
807 return NULL;
792 }
808 }
793
809
794 static inline int nt_level(const char *node, Py_ssize_t level)
810 static inline int nt_level(const char *node, Py_ssize_t level)
795 {
811 {
796 int v = node[level>>1];
812 int v = node[level>>1];
797 if (!(level & 1))
813 if (!(level & 1))
798 v >>= 4;
814 v >>= 4;
799 return v & 0xf;
815 return v & 0xf;
800 }
816 }
801
817
802 /*
818 /*
803 * Return values:
819 * Return values:
804 *
820 *
805 * -4: match is ambiguous (multiple candidates)
821 * -4: match is ambiguous (multiple candidates)
806 * -2: not found
822 * -2: not found
807 * rest: valid rev
823 * rest: valid rev
808 */
824 */
809 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
825 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
810 int hex)
826 int hex)
811 {
827 {
812 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
828 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
813 int level, maxlevel, off;
829 int level, maxlevel, off;
814
830
815 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
831 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
816 return -1;
832 return -1;
817
833
818 if (self->nt == NULL)
834 if (self->nt == NULL)
819 return -2;
835 return -2;
820
836
821 if (hex)
837 if (hex)
822 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
838 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
823 else
839 else
824 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
840 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
825
841
826 for (level = off = 0; level < maxlevel; level++) {
842 for (level = off = 0; level < maxlevel; level++) {
827 int k = getnybble(node, level);
843 int k = getnybble(node, level);
828 nodetree *n = &self->nt[off];
844 nodetree *n = &self->nt[off];
829 int v = n->children[k];
845 int v = n->children[k];
830
846
831 if (v < 0) {
847 if (v < 0) {
832 const char *n;
848 const char *n;
833 Py_ssize_t i;
849 Py_ssize_t i;
834
850
835 v = -v - 1;
851 v = -v - 1;
836 n = index_node(self, v);
852 n = index_node(self, v);
837 if (n == NULL)
853 if (n == NULL)
838 return -2;
854 return -2;
839 for (i = level; i < maxlevel; i++)
855 for (i = level; i < maxlevel; i++)
840 if (getnybble(node, i) != nt_level(n, i))
856 if (getnybble(node, i) != nt_level(n, i))
841 return -2;
857 return -2;
842 return v;
858 return v;
843 }
859 }
844 if (v == 0)
860 if (v == 0)
845 return -2;
861 return -2;
846 off = v;
862 off = v;
847 }
863 }
848 /* multiple matches against an ambiguous prefix */
864 /* multiple matches against an ambiguous prefix */
849 return -4;
865 return -4;
850 }
866 }
851
867
852 static int nt_new(indexObject *self)
868 static int nt_new(indexObject *self)
853 {
869 {
854 if (self->ntlength == self->ntcapacity) {
870 if (self->ntlength == self->ntcapacity) {
855 self->ntcapacity *= 2;
871 self->ntcapacity *= 2;
856 self->nt = realloc(self->nt,
872 self->nt = realloc(self->nt,
857 self->ntcapacity * sizeof(nodetree));
873 self->ntcapacity * sizeof(nodetree));
858 if (self->nt == NULL) {
874 if (self->nt == NULL) {
859 PyErr_SetString(PyExc_MemoryError, "out of memory");
875 PyErr_SetString(PyExc_MemoryError, "out of memory");
860 return -1;
876 return -1;
861 }
877 }
862 memset(&self->nt[self->ntlength], 0,
878 memset(&self->nt[self->ntlength], 0,
863 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
879 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
864 }
880 }
865 return self->ntlength++;
881 return self->ntlength++;
866 }
882 }
867
883
868 static int nt_insert(indexObject *self, const char *node, int rev)
884 static int nt_insert(indexObject *self, const char *node, int rev)
869 {
885 {
870 int level = 0;
886 int level = 0;
871 int off = 0;
887 int off = 0;
872
888
873 while (level < 40) {
889 while (level < 40) {
874 int k = nt_level(node, level);
890 int k = nt_level(node, level);
875 nodetree *n;
891 nodetree *n;
876 int v;
892 int v;
877
893
878 n = &self->nt[off];
894 n = &self->nt[off];
879 v = n->children[k];
895 v = n->children[k];
880
896
881 if (v == 0) {
897 if (v == 0) {
882 n->children[k] = -rev - 1;
898 n->children[k] = -rev - 1;
883 return 0;
899 return 0;
884 }
900 }
885 if (v < 0) {
901 if (v < 0) {
886 const char *oldnode = index_node(self, -v - 1);
902 const char *oldnode = index_node(self, -v - 1);
887 int noff;
903 int noff;
888
904
889 if (!oldnode || !memcmp(oldnode, node, 20)) {
905 if (!oldnode || !memcmp(oldnode, node, 20)) {
890 n->children[k] = -rev - 1;
906 n->children[k] = -rev - 1;
891 return 0;
907 return 0;
892 }
908 }
893 noff = nt_new(self);
909 noff = nt_new(self);
894 if (noff == -1)
910 if (noff == -1)
895 return -1;
911 return -1;
896 /* self->nt may have been changed by realloc */
912 /* self->nt may have been changed by realloc */
897 self->nt[off].children[k] = noff;
913 self->nt[off].children[k] = noff;
898 off = noff;
914 off = noff;
899 n = &self->nt[off];
915 n = &self->nt[off];
900 n->children[nt_level(oldnode, ++level)] = v;
916 n->children[nt_level(oldnode, ++level)] = v;
901 if (level > self->ntdepth)
917 if (level > self->ntdepth)
902 self->ntdepth = level;
918 self->ntdepth = level;
903 self->ntsplits += 1;
919 self->ntsplits += 1;
904 } else {
920 } else {
905 level += 1;
921 level += 1;
906 off = v;
922 off = v;
907 }
923 }
908 }
924 }
909
925
910 return -1;
926 return -1;
911 }
927 }
912
928
913 static int nt_init(indexObject *self)
929 static int nt_init(indexObject *self)
914 {
930 {
915 if (self->nt == NULL) {
931 if (self->nt == NULL) {
916 self->ntcapacity = self->raw_length < 4
932 self->ntcapacity = self->raw_length < 4
917 ? 4 : self->raw_length / 2;
933 ? 4 : self->raw_length / 2;
918 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
934 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
919 if (self->nt == NULL) {
935 if (self->nt == NULL) {
920 PyErr_NoMemory();
936 PyErr_NoMemory();
921 return -1;
937 return -1;
922 }
938 }
923 self->ntlength = 1;
939 self->ntlength = 1;
924 self->ntrev = (int)index_length(self) - 1;
940 self->ntrev = (int)index_length(self) - 1;
925 self->ntlookups = 1;
941 self->ntlookups = 1;
926 self->ntmisses = 0;
942 self->ntmisses = 0;
927 if (nt_insert(self, nullid, INT_MAX) == -1)
943 if (nt_insert(self, nullid, INT_MAX) == -1)
928 return -1;
944 return -1;
929 }
945 }
930 return 0;
946 return 0;
931 }
947 }
932
948
933 /*
949 /*
934 * Return values:
950 * Return values:
935 *
951 *
936 * -3: error (exception set)
952 * -3: error (exception set)
937 * -2: not found (no exception set)
953 * -2: not found (no exception set)
938 * rest: valid rev
954 * rest: valid rev
939 */
955 */
940 static int index_find_node(indexObject *self,
956 static int index_find_node(indexObject *self,
941 const char *node, Py_ssize_t nodelen)
957 const char *node, Py_ssize_t nodelen)
942 {
958 {
943 int rev;
959 int rev;
944
960
945 self->ntlookups++;
961 self->ntlookups++;
946 rev = nt_find(self, node, nodelen, 0);
962 rev = nt_find(self, node, nodelen, 0);
947 if (rev >= -1)
963 if (rev >= -1)
948 return rev;
964 return rev;
949
965
950 if (nt_init(self) == -1)
966 if (nt_init(self) == -1)
951 return -3;
967 return -3;
952
968
953 /*
969 /*
954 * For the first handful of lookups, we scan the entire index,
970 * For the first handful of lookups, we scan the entire index,
955 * and cache only the matching nodes. This optimizes for cases
971 * and cache only the matching nodes. This optimizes for cases
956 * like "hg tip", where only a few nodes are accessed.
972 * like "hg tip", where only a few nodes are accessed.
957 *
973 *
958 * After that, we cache every node we visit, using a single
974 * After that, we cache every node we visit, using a single
959 * scan amortized over multiple lookups. This gives the best
975 * scan amortized over multiple lookups. This gives the best
960 * bulk performance, e.g. for "hg log".
976 * bulk performance, e.g. for "hg log".
961 */
977 */
962 if (self->ntmisses++ < 4) {
978 if (self->ntmisses++ < 4) {
963 for (rev = self->ntrev - 1; rev >= 0; rev--) {
979 for (rev = self->ntrev - 1; rev >= 0; rev--) {
964 const char *n = index_node(self, rev);
980 const char *n = index_node(self, rev);
965 if (n == NULL)
981 if (n == NULL)
966 return -2;
982 return -2;
967 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
983 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
968 if (nt_insert(self, n, rev) == -1)
984 if (nt_insert(self, n, rev) == -1)
969 return -3;
985 return -3;
970 break;
986 break;
971 }
987 }
972 }
988 }
973 } else {
989 } else {
974 for (rev = self->ntrev - 1; rev >= 0; rev--) {
990 for (rev = self->ntrev - 1; rev >= 0; rev--) {
975 const char *n = index_node(self, rev);
991 const char *n = index_node(self, rev);
976 if (n == NULL) {
992 if (n == NULL) {
977 self->ntrev = rev + 1;
993 self->ntrev = rev + 1;
978 return -2;
994 return -2;
979 }
995 }
980 if (nt_insert(self, n, rev) == -1) {
996 if (nt_insert(self, n, rev) == -1) {
981 self->ntrev = rev + 1;
997 self->ntrev = rev + 1;
982 return -3;
998 return -3;
983 }
999 }
984 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1000 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
985 break;
1001 break;
986 }
1002 }
987 }
1003 }
988 self->ntrev = rev;
1004 self->ntrev = rev;
989 }
1005 }
990
1006
991 if (rev >= 0)
1007 if (rev >= 0)
992 return rev;
1008 return rev;
993 return -2;
1009 return -2;
994 }
1010 }
995
1011
996 static PyObject *raise_revlog_error(void)
1012 static PyObject *raise_revlog_error(void)
997 {
1013 {
998 static PyObject *errclass;
1014 static PyObject *errclass;
999 PyObject *mod = NULL, *errobj;
1015 PyObject *mod = NULL, *errobj;
1000
1016
1001 if (errclass == NULL) {
1017 if (errclass == NULL) {
1002 PyObject *dict;
1018 PyObject *dict;
1003
1019
1004 mod = PyImport_ImportModule("mercurial.error");
1020 mod = PyImport_ImportModule("mercurial.error");
1005 if (mod == NULL)
1021 if (mod == NULL)
1006 goto classfail;
1022 goto classfail;
1007
1023
1008 dict = PyModule_GetDict(mod);
1024 dict = PyModule_GetDict(mod);
1009 if (dict == NULL)
1025 if (dict == NULL)
1010 goto classfail;
1026 goto classfail;
1011
1027
1012 errclass = PyDict_GetItemString(dict, "RevlogError");
1028 errclass = PyDict_GetItemString(dict, "RevlogError");
1013 if (errclass == NULL) {
1029 if (errclass == NULL) {
1014 PyErr_SetString(PyExc_SystemError,
1030 PyErr_SetString(PyExc_SystemError,
1015 "could not find RevlogError");
1031 "could not find RevlogError");
1016 goto classfail;
1032 goto classfail;
1017 }
1033 }
1018 Py_INCREF(errclass);
1034 Py_INCREF(errclass);
1019 }
1035 }
1020
1036
1021 errobj = PyObject_CallFunction(errclass, NULL);
1037 errobj = PyObject_CallFunction(errclass, NULL);
1022 if (errobj == NULL)
1038 if (errobj == NULL)
1023 return NULL;
1039 return NULL;
1024 PyErr_SetObject(errclass, errobj);
1040 PyErr_SetObject(errclass, errobj);
1025 return errobj;
1041 return errobj;
1026
1042
1027 classfail:
1043 classfail:
1028 Py_XDECREF(mod);
1044 Py_XDECREF(mod);
1029 return NULL;
1045 return NULL;
1030 }
1046 }
1031
1047
1032 static PyObject *index_getitem(indexObject *self, PyObject *value)
1048 static PyObject *index_getitem(indexObject *self, PyObject *value)
1033 {
1049 {
1034 char *node;
1050 char *node;
1035 Py_ssize_t nodelen;
1051 Py_ssize_t nodelen;
1036 int rev;
1052 int rev;
1037
1053
1038 if (PyInt_Check(value))
1054 if (PyInt_Check(value))
1039 return index_get(self, PyInt_AS_LONG(value));
1055 return index_get(self, PyInt_AS_LONG(value));
1040
1056
1041 if (node_check(value, &node, &nodelen) == -1)
1057 if (node_check(value, &node, &nodelen) == -1)
1042 return NULL;
1058 return NULL;
1043 rev = index_find_node(self, node, nodelen);
1059 rev = index_find_node(self, node, nodelen);
1044 if (rev >= -1)
1060 if (rev >= -1)
1045 return PyInt_FromLong(rev);
1061 return PyInt_FromLong(rev);
1046 if (rev == -2)
1062 if (rev == -2)
1047 raise_revlog_error();
1063 raise_revlog_error();
1048 return NULL;
1064 return NULL;
1049 }
1065 }
1050
1066
1051 static int nt_partialmatch(indexObject *self, const char *node,
1067 static int nt_partialmatch(indexObject *self, const char *node,
1052 Py_ssize_t nodelen)
1068 Py_ssize_t nodelen)
1053 {
1069 {
1054 int rev;
1070 int rev;
1055
1071
1056 if (nt_init(self) == -1)
1072 if (nt_init(self) == -1)
1057 return -3;
1073 return -3;
1058
1074
1059 if (self->ntrev > 0) {
1075 if (self->ntrev > 0) {
1060 /* ensure that the radix tree is fully populated */
1076 /* ensure that the radix tree is fully populated */
1061 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1077 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1062 const char *n = index_node(self, rev);
1078 const char *n = index_node(self, rev);
1063 if (n == NULL)
1079 if (n == NULL)
1064 return -2;
1080 return -2;
1065 if (nt_insert(self, n, rev) == -1)
1081 if (nt_insert(self, n, rev) == -1)
1066 return -3;
1082 return -3;
1067 }
1083 }
1068 self->ntrev = rev;
1084 self->ntrev = rev;
1069 }
1085 }
1070
1086
1071 return nt_find(self, node, nodelen, 1);
1087 return nt_find(self, node, nodelen, 1);
1072 }
1088 }
1073
1089
1074 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1090 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1075 {
1091 {
1076 const char *fullnode;
1092 const char *fullnode;
1077 int nodelen;
1093 int nodelen;
1078 char *node;
1094 char *node;
1079 int rev, i;
1095 int rev, i;
1080
1096
1081 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1097 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1082 return NULL;
1098 return NULL;
1083
1099
1084 if (nodelen < 4) {
1100 if (nodelen < 4) {
1085 PyErr_SetString(PyExc_ValueError, "key too short");
1101 PyErr_SetString(PyExc_ValueError, "key too short");
1086 return NULL;
1102 return NULL;
1087 }
1103 }
1088
1104
1089 if (nodelen > 40) {
1105 if (nodelen > 40) {
1090 PyErr_SetString(PyExc_ValueError, "key too long");
1106 PyErr_SetString(PyExc_ValueError, "key too long");
1091 return NULL;
1107 return NULL;
1092 }
1108 }
1093
1109
1094 for (i = 0; i < nodelen; i++)
1110 for (i = 0; i < nodelen; i++)
1095 hexdigit(node, i);
1111 hexdigit(node, i);
1096 if (PyErr_Occurred()) {
1112 if (PyErr_Occurred()) {
1097 /* input contains non-hex characters */
1113 /* input contains non-hex characters */
1098 PyErr_Clear();
1114 PyErr_Clear();
1099 Py_RETURN_NONE;
1115 Py_RETURN_NONE;
1100 }
1116 }
1101
1117
1102 rev = nt_partialmatch(self, node, nodelen);
1118 rev = nt_partialmatch(self, node, nodelen);
1103
1119
1104 switch (rev) {
1120 switch (rev) {
1105 case -4:
1121 case -4:
1106 raise_revlog_error();
1122 raise_revlog_error();
1107 case -3:
1123 case -3:
1108 return NULL;
1124 return NULL;
1109 case -2:
1125 case -2:
1110 Py_RETURN_NONE;
1126 Py_RETURN_NONE;
1111 case -1:
1127 case -1:
1112 return PyString_FromStringAndSize(nullid, 20);
1128 return PyString_FromStringAndSize(nullid, 20);
1113 }
1129 }
1114
1130
1115 fullnode = index_node(self, rev);
1131 fullnode = index_node(self, rev);
1116 if (fullnode == NULL) {
1132 if (fullnode == NULL) {
1117 PyErr_Format(PyExc_IndexError,
1133 PyErr_Format(PyExc_IndexError,
1118 "could not access rev %d", rev);
1134 "could not access rev %d", rev);
1119 return NULL;
1135 return NULL;
1120 }
1136 }
1121 return PyString_FromStringAndSize(fullnode, 20);
1137 return PyString_FromStringAndSize(fullnode, 20);
1122 }
1138 }
1123
1139
1124 static PyObject *index_m_get(indexObject *self, PyObject *args)
1140 static PyObject *index_m_get(indexObject *self, PyObject *args)
1125 {
1141 {
1126 Py_ssize_t nodelen;
1142 Py_ssize_t nodelen;
1127 PyObject *val;
1143 PyObject *val;
1128 char *node;
1144 char *node;
1129 int rev;
1145 int rev;
1130
1146
1131 if (!PyArg_ParseTuple(args, "O", &val))
1147 if (!PyArg_ParseTuple(args, "O", &val))
1132 return NULL;
1148 return NULL;
1133 if (node_check(val, &node, &nodelen) == -1)
1149 if (node_check(val, &node, &nodelen) == -1)
1134 return NULL;
1150 return NULL;
1135 rev = index_find_node(self, node, nodelen);
1151 rev = index_find_node(self, node, nodelen);
1136 if (rev == -3)
1152 if (rev == -3)
1137 return NULL;
1153 return NULL;
1138 if (rev == -2)
1154 if (rev == -2)
1139 Py_RETURN_NONE;
1155 Py_RETURN_NONE;
1140 return PyInt_FromLong(rev);
1156 return PyInt_FromLong(rev);
1141 }
1157 }
1142
1158
1143 static int index_contains(indexObject *self, PyObject *value)
1159 static int index_contains(indexObject *self, PyObject *value)
1144 {
1160 {
1145 char *node;
1161 char *node;
1146 Py_ssize_t nodelen;
1162 Py_ssize_t nodelen;
1147
1163
1148 if (PyInt_Check(value)) {
1164 if (PyInt_Check(value)) {
1149 long rev = PyInt_AS_LONG(value);
1165 long rev = PyInt_AS_LONG(value);
1150 return rev >= -1 && rev < index_length(self);
1166 return rev >= -1 && rev < index_length(self);
1151 }
1167 }
1152
1168
1153 if (node_check(value, &node, &nodelen) == -1)
1169 if (node_check(value, &node, &nodelen) == -1)
1154 return -1;
1170 return -1;
1155
1171
1156 switch (index_find_node(self, node, nodelen)) {
1172 switch (index_find_node(self, node, nodelen)) {
1157 case -3:
1173 case -3:
1158 return -1;
1174 return -1;
1159 case -2:
1175 case -2:
1160 return 0;
1176 return 0;
1161 default:
1177 default:
1162 return 1;
1178 return 1;
1163 }
1179 }
1164 }
1180 }
1165
1181
1166 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1182 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1167 {
1183 {
1168 if (rev >= self->length - 1) {
1184 if (rev >= self->length - 1) {
1169 PyObject *tuple = PyList_GET_ITEM(self->added,
1185 PyObject *tuple = PyList_GET_ITEM(self->added,
1170 rev - self->length + 1);
1186 rev - self->length + 1);
1171 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1187 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1172 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1188 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1173 } else {
1189 } else {
1174 const char *data = index_deref(self, rev);
1190 const char *data = index_deref(self, rev);
1175 ps[0] = getbe32(data + 24);
1191 ps[0] = getbe32(data + 24);
1176 ps[1] = getbe32(data + 28);
1192 ps[1] = getbe32(data + 28);
1177 }
1193 }
1178 }
1194 }
1179
1195
1180 typedef uint64_t bitmask;
1196 typedef uint64_t bitmask;
1181
1197
1182 /*
1198 /*
1183 * Given a disjoint set of revs, return all candidates for the
1199 * Given a disjoint set of revs, return all candidates for the
1184 * greatest common ancestor. In revset notation, this is the set
1200 * greatest common ancestor. In revset notation, this is the set
1185 * "heads(::a and ::b and ...)"
1201 * "heads(::a and ::b and ...)"
1186 */
1202 */
1187 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1203 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1188 int revcount)
1204 int revcount)
1189 {
1205 {
1190 const bitmask allseen = (1ull << revcount) - 1;
1206 const bitmask allseen = (1ull << revcount) - 1;
1191 const bitmask poison = 1ull << revcount;
1207 const bitmask poison = 1ull << revcount;
1192 PyObject *gca = PyList_New(0);
1208 PyObject *gca = PyList_New(0);
1193 int i, v, interesting, left;
1209 int i, v, interesting, left;
1194 int maxrev = -1;
1210 int maxrev = -1;
1195 long sp;
1211 long sp;
1196 bitmask *seen;
1212 bitmask *seen;
1197
1213
1198 for (i = 0; i < revcount; i++) {
1214 for (i = 0; i < revcount; i++) {
1199 if (revs[i] > maxrev)
1215 if (revs[i] > maxrev)
1200 maxrev = revs[i];
1216 maxrev = revs[i];
1201 }
1217 }
1202
1218
1203 seen = calloc(sizeof(*seen), maxrev + 1);
1219 seen = calloc(sizeof(*seen), maxrev + 1);
1204 if (seen == NULL)
1220 if (seen == NULL)
1205 return PyErr_NoMemory();
1221 return PyErr_NoMemory();
1206
1222
1207 for (i = 0; i < revcount; i++)
1223 for (i = 0; i < revcount; i++)
1208 seen[revs[i]] = 1ull << i;
1224 seen[revs[i]] = 1ull << i;
1209
1225
1210 interesting = left = revcount;
1226 interesting = left = revcount;
1211
1227
1212 for (v = maxrev; v >= 0 && interesting; v--) {
1228 for (v = maxrev; v >= 0 && interesting; v--) {
1213 long sv = seen[v];
1229 long sv = seen[v];
1214 int parents[2];
1230 int parents[2];
1215
1231
1216 if (!sv)
1232 if (!sv)
1217 continue;
1233 continue;
1218
1234
1219 if (sv < poison) {
1235 if (sv < poison) {
1220 interesting -= 1;
1236 interesting -= 1;
1221 if (sv == allseen) {
1237 if (sv == allseen) {
1222 PyObject *obj = PyInt_FromLong(v);
1238 PyObject *obj = PyInt_FromLong(v);
1223 if (obj == NULL)
1239 if (obj == NULL)
1224 goto bail;
1240 goto bail;
1225 if (PyList_Append(gca, obj) == -1) {
1241 if (PyList_Append(gca, obj) == -1) {
1226 Py_DECREF(obj);
1242 Py_DECREF(obj);
1227 goto bail;
1243 goto bail;
1228 }
1244 }
1229 sv |= poison;
1245 sv |= poison;
1230 for (i = 0; i < revcount; i++) {
1246 for (i = 0; i < revcount; i++) {
1231 if (revs[i] == v) {
1247 if (revs[i] == v) {
1232 if (--left <= 1)
1248 if (--left <= 1)
1233 goto done;
1249 goto done;
1234 break;
1250 break;
1235 }
1251 }
1236 }
1252 }
1237 }
1253 }
1238 }
1254 }
1239 index_get_parents(self, v, parents);
1255 index_get_parents(self, v, parents);
1240
1256
1241 for (i = 0; i < 2; i++) {
1257 for (i = 0; i < 2; i++) {
1242 int p = parents[i];
1258 int p = parents[i];
1243 if (p == -1)
1259 if (p == -1)
1244 continue;
1260 continue;
1245 sp = seen[p];
1261 sp = seen[p];
1246 if (sv < poison) {
1262 if (sv < poison) {
1247 if (sp == 0) {
1263 if (sp == 0) {
1248 seen[p] = sv;
1264 seen[p] = sv;
1249 interesting++;
1265 interesting++;
1250 }
1266 }
1251 else if (sp != sv)
1267 else if (sp != sv)
1252 seen[p] |= sv;
1268 seen[p] |= sv;
1253 } else {
1269 } else {
1254 if (sp && sp < poison)
1270 if (sp && sp < poison)
1255 interesting--;
1271 interesting--;
1256 seen[p] = sv;
1272 seen[p] = sv;
1257 }
1273 }
1258 }
1274 }
1259 }
1275 }
1260
1276
1261 done:
1277 done:
1262 free(seen);
1278 free(seen);
1263 return gca;
1279 return gca;
1264 bail:
1280 bail:
1265 free(seen);
1281 free(seen);
1266 Py_XDECREF(gca);
1282 Py_XDECREF(gca);
1267 return NULL;
1283 return NULL;
1268 }
1284 }
1269
1285
1270 /*
1286 /*
1271 * Given a disjoint set of revs, return the subset with the longest
1287 * Given a disjoint set of revs, return the subset with the longest
1272 * path to the root.
1288 * path to the root.
1273 */
1289 */
1274 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1290 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1275 {
1291 {
1276 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1292 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1277 static const Py_ssize_t capacity = 24;
1293 static const Py_ssize_t capacity = 24;
1278 int *depth, *interesting = NULL;
1294 int *depth, *interesting = NULL;
1279 int i, j, v, ninteresting;
1295 int i, j, v, ninteresting;
1280 PyObject *dict = NULL, *keys;
1296 PyObject *dict = NULL, *keys;
1281 long *seen = NULL;
1297 long *seen = NULL;
1282 int maxrev = -1;
1298 int maxrev = -1;
1283 long final;
1299 long final;
1284
1300
1285 if (revcount > capacity) {
1301 if (revcount > capacity) {
1286 PyErr_Format(PyExc_OverflowError,
1302 PyErr_Format(PyExc_OverflowError,
1287 "bitset size (%ld) > capacity (%ld)",
1303 "bitset size (%ld) > capacity (%ld)",
1288 (long)revcount, (long)capacity);
1304 (long)revcount, (long)capacity);
1289 return NULL;
1305 return NULL;
1290 }
1306 }
1291
1307
1292 for (i = 0; i < revcount; i++) {
1308 for (i = 0; i < revcount; i++) {
1293 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1309 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1294 if (n > maxrev)
1310 if (n > maxrev)
1295 maxrev = n;
1311 maxrev = n;
1296 }
1312 }
1297
1313
1298 depth = calloc(sizeof(*depth), maxrev + 1);
1314 depth = calloc(sizeof(*depth), maxrev + 1);
1299 if (depth == NULL)
1315 if (depth == NULL)
1300 return PyErr_NoMemory();
1316 return PyErr_NoMemory();
1301
1317
1302 seen = calloc(sizeof(*seen), maxrev + 1);
1318 seen = calloc(sizeof(*seen), maxrev + 1);
1303 if (seen == NULL) {
1319 if (seen == NULL) {
1304 PyErr_NoMemory();
1320 PyErr_NoMemory();
1305 goto bail;
1321 goto bail;
1306 }
1322 }
1307
1323
1308 interesting = calloc(sizeof(*interesting), 2 << revcount);
1324 interesting = calloc(sizeof(*interesting), 2 << revcount);
1309 if (interesting == NULL) {
1325 if (interesting == NULL) {
1310 PyErr_NoMemory();
1326 PyErr_NoMemory();
1311 goto bail;
1327 goto bail;
1312 }
1328 }
1313
1329
1314 if (PyList_Sort(revs) == -1)
1330 if (PyList_Sort(revs) == -1)
1315 goto bail;
1331 goto bail;
1316
1332
1317 for (i = 0; i < revcount; i++) {
1333 for (i = 0; i < revcount; i++) {
1318 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1334 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1319 long b = 1l << i;
1335 long b = 1l << i;
1320 depth[n] = 1;
1336 depth[n] = 1;
1321 seen[n] = b;
1337 seen[n] = b;
1322 interesting[b] = 1;
1338 interesting[b] = 1;
1323 }
1339 }
1324
1340
1325 ninteresting = (int)revcount;
1341 ninteresting = (int)revcount;
1326
1342
1327 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1343 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1328 int dv = depth[v];
1344 int dv = depth[v];
1329 int parents[2];
1345 int parents[2];
1330 long sv;
1346 long sv;
1331
1347
1332 if (dv == 0)
1348 if (dv == 0)
1333 continue;
1349 continue;
1334
1350
1335 sv = seen[v];
1351 sv = seen[v];
1336 index_get_parents(self, v, parents);
1352 index_get_parents(self, v, parents);
1337
1353
1338 for (i = 0; i < 2; i++) {
1354 for (i = 0; i < 2; i++) {
1339 int p = parents[i];
1355 int p = parents[i];
1340 long nsp, sp;
1356 long nsp, sp;
1341 int dp;
1357 int dp;
1342
1358
1343 if (p == -1)
1359 if (p == -1)
1344 continue;
1360 continue;
1345
1361
1346 dp = depth[p];
1362 dp = depth[p];
1347 nsp = sp = seen[p];
1363 nsp = sp = seen[p];
1348 if (dp <= dv) {
1364 if (dp <= dv) {
1349 depth[p] = dv + 1;
1365 depth[p] = dv + 1;
1350 if (sp != sv) {
1366 if (sp != sv) {
1351 interesting[sv] += 1;
1367 interesting[sv] += 1;
1352 nsp = seen[p] = sv;
1368 nsp = seen[p] = sv;
1353 if (sp) {
1369 if (sp) {
1354 interesting[sp] -= 1;
1370 interesting[sp] -= 1;
1355 if (interesting[sp] == 0)
1371 if (interesting[sp] == 0)
1356 ninteresting -= 1;
1372 ninteresting -= 1;
1357 }
1373 }
1358 }
1374 }
1359 }
1375 }
1360 else if (dv == dp - 1) {
1376 else if (dv == dp - 1) {
1361 nsp = sp | sv;
1377 nsp = sp | sv;
1362 if (nsp == sp)
1378 if (nsp == sp)
1363 continue;
1379 continue;
1364 seen[p] = nsp;
1380 seen[p] = nsp;
1365 interesting[sp] -= 1;
1381 interesting[sp] -= 1;
1366 if (interesting[sp] == 0 && interesting[nsp] > 0)
1382 if (interesting[sp] == 0 && interesting[nsp] > 0)
1367 ninteresting -= 1;
1383 ninteresting -= 1;
1368 interesting[nsp] += 1;
1384 interesting[nsp] += 1;
1369 }
1385 }
1370 }
1386 }
1371 interesting[sv] -= 1;
1387 interesting[sv] -= 1;
1372 if (interesting[sv] == 0)
1388 if (interesting[sv] == 0)
1373 ninteresting -= 1;
1389 ninteresting -= 1;
1374 }
1390 }
1375
1391
1376 final = 0;
1392 final = 0;
1377 j = ninteresting;
1393 j = ninteresting;
1378 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1394 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1379 if (interesting[i] == 0)
1395 if (interesting[i] == 0)
1380 continue;
1396 continue;
1381 final |= i;
1397 final |= i;
1382 j -= 1;
1398 j -= 1;
1383 }
1399 }
1384 if (final == 0)
1400 if (final == 0)
1385 return PyList_New(0);
1401 return PyList_New(0);
1386
1402
1387 dict = PyDict_New();
1403 dict = PyDict_New();
1388 if (dict == NULL)
1404 if (dict == NULL)
1389 goto bail;
1405 goto bail;
1390
1406
1391 for (i = 0; i < revcount; i++) {
1407 for (i = 0; i < revcount; i++) {
1392 PyObject *key;
1408 PyObject *key;
1393
1409
1394 if ((final & (1 << i)) == 0)
1410 if ((final & (1 << i)) == 0)
1395 continue;
1411 continue;
1396
1412
1397 key = PyList_GET_ITEM(revs, i);
1413 key = PyList_GET_ITEM(revs, i);
1398 Py_INCREF(key);
1414 Py_INCREF(key);
1399 Py_INCREF(Py_None);
1415 Py_INCREF(Py_None);
1400 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1416 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1401 Py_DECREF(key);
1417 Py_DECREF(key);
1402 Py_DECREF(Py_None);
1418 Py_DECREF(Py_None);
1403 goto bail;
1419 goto bail;
1404 }
1420 }
1405 }
1421 }
1406
1422
1407 keys = PyDict_Keys(dict);
1423 keys = PyDict_Keys(dict);
1408
1424
1409 free(depth);
1425 free(depth);
1410 free(seen);
1426 free(seen);
1411 free(interesting);
1427 free(interesting);
1412 Py_DECREF(dict);
1428 Py_DECREF(dict);
1413
1429
1414 return keys;
1430 return keys;
1415 bail:
1431 bail:
1416 free(depth);
1432 free(depth);
1417 free(seen);
1433 free(seen);
1418 free(interesting);
1434 free(interesting);
1419 Py_XDECREF(dict);
1435 Py_XDECREF(dict);
1420
1436
1421 return NULL;
1437 return NULL;
1422 }
1438 }
1423
1439
1424 /*
1440 /*
1425 * Given a (possibly overlapping) set of revs, return the greatest
1441 * Given a (possibly overlapping) set of revs, return the greatest
1426 * common ancestors: those with the longest path to the root.
1442 * common ancestors: those with the longest path to the root.
1427 */
1443 */
1428 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1444 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1429 {
1445 {
1430 PyObject *ret = NULL, *gca = NULL;
1446 PyObject *ret = NULL, *gca = NULL;
1431 Py_ssize_t argcount, i, len;
1447 Py_ssize_t argcount, i, len;
1432 bitmask repeat = 0;
1448 bitmask repeat = 0;
1433 int revcount = 0;
1449 int revcount = 0;
1434 int *revs;
1450 int *revs;
1435
1451
1436 argcount = PySequence_Length(args);
1452 argcount = PySequence_Length(args);
1437 revs = malloc(argcount * sizeof(*revs));
1453 revs = malloc(argcount * sizeof(*revs));
1438 if (argcount > 0 && revs == NULL)
1454 if (argcount > 0 && revs == NULL)
1439 return PyErr_NoMemory();
1455 return PyErr_NoMemory();
1440 len = index_length(self) - 1;
1456 len = index_length(self) - 1;
1441
1457
1442 for (i = 0; i < argcount; i++) {
1458 for (i = 0; i < argcount; i++) {
1443 static const int capacity = 24;
1459 static const int capacity = 24;
1444 PyObject *obj = PySequence_GetItem(args, i);
1460 PyObject *obj = PySequence_GetItem(args, i);
1445 bitmask x;
1461 bitmask x;
1446 long val;
1462 long val;
1447
1463
1448 if (!PyInt_Check(obj)) {
1464 if (!PyInt_Check(obj)) {
1449 PyErr_SetString(PyExc_TypeError,
1465 PyErr_SetString(PyExc_TypeError,
1450 "arguments must all be ints");
1466 "arguments must all be ints");
1451 goto bail;
1467 goto bail;
1452 }
1468 }
1453 val = PyInt_AsLong(obj);
1469 val = PyInt_AsLong(obj);
1454 if (val == -1) {
1470 if (val == -1) {
1455 ret = PyList_New(0);
1471 ret = PyList_New(0);
1456 goto done;
1472 goto done;
1457 }
1473 }
1458 if (val < 0 || val >= len) {
1474 if (val < 0 || val >= len) {
1459 PyErr_SetString(PyExc_IndexError,
1475 PyErr_SetString(PyExc_IndexError,
1460 "index out of range");
1476 "index out of range");
1461 goto bail;
1477 goto bail;
1462 }
1478 }
1463 /* this cheesy bloom filter lets us avoid some more
1479 /* this cheesy bloom filter lets us avoid some more
1464 * expensive duplicate checks in the common set-is-disjoint
1480 * expensive duplicate checks in the common set-is-disjoint
1465 * case */
1481 * case */
1466 x = 1ull << (val & 0x3f);
1482 x = 1ull << (val & 0x3f);
1467 if (repeat & x) {
1483 if (repeat & x) {
1468 int k;
1484 int k;
1469 for (k = 0; k < revcount; k++) {
1485 for (k = 0; k < revcount; k++) {
1470 if (val == revs[k])
1486 if (val == revs[k])
1471 goto duplicate;
1487 goto duplicate;
1472 }
1488 }
1473 }
1489 }
1474 else repeat |= x;
1490 else repeat |= x;
1475 if (revcount >= capacity) {
1491 if (revcount >= capacity) {
1476 PyErr_Format(PyExc_OverflowError,
1492 PyErr_Format(PyExc_OverflowError,
1477 "bitset size (%d) > capacity (%d)",
1493 "bitset size (%d) > capacity (%d)",
1478 revcount, capacity);
1494 revcount, capacity);
1479 goto bail;
1495 goto bail;
1480 }
1496 }
1481 revs[revcount++] = (int)val;
1497 revs[revcount++] = (int)val;
1482 duplicate:;
1498 duplicate:;
1483 }
1499 }
1484
1500
1485 if (revcount == 0) {
1501 if (revcount == 0) {
1486 ret = PyList_New(0);
1502 ret = PyList_New(0);
1487 goto done;
1503 goto done;
1488 }
1504 }
1489 if (revcount == 1) {
1505 if (revcount == 1) {
1490 PyObject *obj;
1506 PyObject *obj;
1491 ret = PyList_New(1);
1507 ret = PyList_New(1);
1492 if (ret == NULL)
1508 if (ret == NULL)
1493 goto bail;
1509 goto bail;
1494 obj = PyInt_FromLong(revs[0]);
1510 obj = PyInt_FromLong(revs[0]);
1495 if (obj == NULL)
1511 if (obj == NULL)
1496 goto bail;
1512 goto bail;
1497 PyList_SET_ITEM(ret, 0, obj);
1513 PyList_SET_ITEM(ret, 0, obj);
1498 goto done;
1514 goto done;
1499 }
1515 }
1500
1516
1501 gca = find_gca_candidates(self, revs, revcount);
1517 gca = find_gca_candidates(self, revs, revcount);
1502 if (gca == NULL)
1518 if (gca == NULL)
1503 goto bail;
1519 goto bail;
1504
1520
1505 if (PyList_GET_SIZE(gca) <= 1) {
1521 if (PyList_GET_SIZE(gca) <= 1) {
1506 ret = gca;
1522 ret = gca;
1507 Py_INCREF(gca);
1523 Py_INCREF(gca);
1508 }
1524 }
1509 else if (PyList_GET_SIZE(gca) == 1) {
1525 else if (PyList_GET_SIZE(gca) == 1) {
1510 ret = PyList_GET_ITEM(gca, 0);
1526 ret = PyList_GET_ITEM(gca, 0);
1511 Py_INCREF(ret);
1527 Py_INCREF(ret);
1512 }
1528 }
1513 else ret = find_deepest(self, gca);
1529 else ret = find_deepest(self, gca);
1514
1530
1515 done:
1531 done:
1516 free(revs);
1532 free(revs);
1517 Py_XDECREF(gca);
1533 Py_XDECREF(gca);
1518
1534
1519 return ret;
1535 return ret;
1520
1536
1521 bail:
1537 bail:
1522 free(revs);
1538 free(revs);
1523 Py_XDECREF(gca);
1539 Py_XDECREF(gca);
1524 Py_XDECREF(ret);
1540 Py_XDECREF(ret);
1525 return NULL;
1541 return NULL;
1526 }
1542 }
1527
1543
1528 /*
1544 /*
1529 * Invalidate any trie entries introduced by added revs.
1545 * Invalidate any trie entries introduced by added revs.
1530 */
1546 */
1531 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1547 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1532 {
1548 {
1533 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1549 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1534
1550
1535 for (i = start; i < len; i++) {
1551 for (i = start; i < len; i++) {
1536 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1552 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1537 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1553 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1538
1554
1539 nt_insert(self, PyString_AS_STRING(node), -1);
1555 nt_insert(self, PyString_AS_STRING(node), -1);
1540 }
1556 }
1541
1557
1542 if (start == 0)
1558 if (start == 0)
1543 Py_CLEAR(self->added);
1559 Py_CLEAR(self->added);
1544 }
1560 }
1545
1561
1546 /*
1562 /*
1547 * Delete a numeric range of revs, which must be at the end of the
1563 * Delete a numeric range of revs, which must be at the end of the
1548 * range, but exclude the sentinel nullid entry.
1564 * range, but exclude the sentinel nullid entry.
1549 */
1565 */
1550 static int index_slice_del(indexObject *self, PyObject *item)
1566 static int index_slice_del(indexObject *self, PyObject *item)
1551 {
1567 {
1552 Py_ssize_t start, stop, step, slicelength;
1568 Py_ssize_t start, stop, step, slicelength;
1553 Py_ssize_t length = index_length(self);
1569 Py_ssize_t length = index_length(self);
1554 int ret = 0;
1570 int ret = 0;
1555
1571
1556 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1572 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1557 &start, &stop, &step, &slicelength) < 0)
1573 &start, &stop, &step, &slicelength) < 0)
1558 return -1;
1574 return -1;
1559
1575
1560 if (slicelength <= 0)
1576 if (slicelength <= 0)
1561 return 0;
1577 return 0;
1562
1578
1563 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1579 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1564 stop = start;
1580 stop = start;
1565
1581
1566 if (step < 0) {
1582 if (step < 0) {
1567 stop = start + 1;
1583 stop = start + 1;
1568 start = stop + step*(slicelength - 1) - 1;
1584 start = stop + step*(slicelength - 1) - 1;
1569 step = -step;
1585 step = -step;
1570 }
1586 }
1571
1587
1572 if (step != 1) {
1588 if (step != 1) {
1573 PyErr_SetString(PyExc_ValueError,
1589 PyErr_SetString(PyExc_ValueError,
1574 "revlog index delete requires step size of 1");
1590 "revlog index delete requires step size of 1");
1575 return -1;
1591 return -1;
1576 }
1592 }
1577
1593
1578 if (stop != length - 1) {
1594 if (stop != length - 1) {
1579 PyErr_SetString(PyExc_IndexError,
1595 PyErr_SetString(PyExc_IndexError,
1580 "revlog index deletion indices are invalid");
1596 "revlog index deletion indices are invalid");
1581 return -1;
1597 return -1;
1582 }
1598 }
1583
1599
1584 if (start < self->length - 1) {
1600 if (start < self->length - 1) {
1585 if (self->nt) {
1601 if (self->nt) {
1586 Py_ssize_t i;
1602 Py_ssize_t i;
1587
1603
1588 for (i = start + 1; i < self->length - 1; i++) {
1604 for (i = start + 1; i < self->length - 1; i++) {
1589 const char *node = index_node(self, i);
1605 const char *node = index_node(self, i);
1590
1606
1591 if (node)
1607 if (node)
1592 nt_insert(self, node, -1);
1608 nt_insert(self, node, -1);
1593 }
1609 }
1594 if (self->added)
1610 if (self->added)
1595 nt_invalidate_added(self, 0);
1611 nt_invalidate_added(self, 0);
1596 if (self->ntrev > start)
1612 if (self->ntrev > start)
1597 self->ntrev = (int)start;
1613 self->ntrev = (int)start;
1598 }
1614 }
1599 self->length = start + 1;
1615 self->length = start + 1;
1600 if (start < self->raw_length) {
1616 if (start < self->raw_length) {
1601 if (self->cache) {
1617 if (self->cache) {
1602 Py_ssize_t i;
1618 Py_ssize_t i;
1603 for (i = start; i < self->raw_length; i++)
1619 for (i = start; i < self->raw_length; i++)
1604 Py_CLEAR(self->cache[i]);
1620 Py_CLEAR(self->cache[i]);
1605 }
1621 }
1606 self->raw_length = start;
1622 self->raw_length = start;
1607 }
1623 }
1608 goto done;
1624 goto done;
1609 }
1625 }
1610
1626
1611 if (self->nt) {
1627 if (self->nt) {
1612 nt_invalidate_added(self, start - self->length + 1);
1628 nt_invalidate_added(self, start - self->length + 1);
1613 if (self->ntrev > start)
1629 if (self->ntrev > start)
1614 self->ntrev = (int)start;
1630 self->ntrev = (int)start;
1615 }
1631 }
1616 if (self->added)
1632 if (self->added)
1617 ret = PyList_SetSlice(self->added, start - self->length + 1,
1633 ret = PyList_SetSlice(self->added, start - self->length + 1,
1618 PyList_GET_SIZE(self->added), NULL);
1634 PyList_GET_SIZE(self->added), NULL);
1619 done:
1635 done:
1620 Py_CLEAR(self->headrevs);
1636 Py_CLEAR(self->headrevs);
1621 return ret;
1637 return ret;
1622 }
1638 }
1623
1639
1624 /*
1640 /*
1625 * Supported ops:
1641 * Supported ops:
1626 *
1642 *
1627 * slice deletion
1643 * slice deletion
1628 * string assignment (extend node->rev mapping)
1644 * string assignment (extend node->rev mapping)
1629 * string deletion (shrink node->rev mapping)
1645 * string deletion (shrink node->rev mapping)
1630 */
1646 */
1631 static int index_assign_subscript(indexObject *self, PyObject *item,
1647 static int index_assign_subscript(indexObject *self, PyObject *item,
1632 PyObject *value)
1648 PyObject *value)
1633 {
1649 {
1634 char *node;
1650 char *node;
1635 Py_ssize_t nodelen;
1651 Py_ssize_t nodelen;
1636 long rev;
1652 long rev;
1637
1653
1638 if (PySlice_Check(item) && value == NULL)
1654 if (PySlice_Check(item) && value == NULL)
1639 return index_slice_del(self, item);
1655 return index_slice_del(self, item);
1640
1656
1641 if (node_check(item, &node, &nodelen) == -1)
1657 if (node_check(item, &node, &nodelen) == -1)
1642 return -1;
1658 return -1;
1643
1659
1644 if (value == NULL)
1660 if (value == NULL)
1645 return self->nt ? nt_insert(self, node, -1) : 0;
1661 return self->nt ? nt_insert(self, node, -1) : 0;
1646 rev = PyInt_AsLong(value);
1662 rev = PyInt_AsLong(value);
1647 if (rev > INT_MAX || rev < 0) {
1663 if (rev > INT_MAX || rev < 0) {
1648 if (!PyErr_Occurred())
1664 if (!PyErr_Occurred())
1649 PyErr_SetString(PyExc_ValueError, "rev out of range");
1665 PyErr_SetString(PyExc_ValueError, "rev out of range");
1650 return -1;
1666 return -1;
1651 }
1667 }
1652 return nt_insert(self, node, (int)rev);
1668 return nt_insert(self, node, (int)rev);
1653 }
1669 }
1654
1670
1655 /*
1671 /*
1656 * Find all RevlogNG entries in an index that has inline data. Update
1672 * Find all RevlogNG entries in an index that has inline data. Update
1657 * the optional "offsets" table with those entries.
1673 * the optional "offsets" table with those entries.
1658 */
1674 */
1659 static long inline_scan(indexObject *self, const char **offsets)
1675 static long inline_scan(indexObject *self, const char **offsets)
1660 {
1676 {
1661 const char *data = PyString_AS_STRING(self->data);
1677 const char *data = PyString_AS_STRING(self->data);
1662 const char *end = data + PyString_GET_SIZE(self->data);
1678 const char *end = data + PyString_GET_SIZE(self->data);
1663 long incr = v1_hdrsize;
1679 long incr = v1_hdrsize;
1664 Py_ssize_t len = 0;
1680 Py_ssize_t len = 0;
1665
1681
1666 while (data + v1_hdrsize <= end) {
1682 while (data + v1_hdrsize <= end) {
1667 uint32_t comp_len;
1683 uint32_t comp_len;
1668 const char *old_data;
1684 const char *old_data;
1669 /* 3rd element of header is length of compressed inline data */
1685 /* 3rd element of header is length of compressed inline data */
1670 comp_len = getbe32(data + 8);
1686 comp_len = getbe32(data + 8);
1671 incr = v1_hdrsize + comp_len;
1687 incr = v1_hdrsize + comp_len;
1672 if (incr < v1_hdrsize)
1688 if (incr < v1_hdrsize)
1673 break;
1689 break;
1674 if (offsets)
1690 if (offsets)
1675 offsets[len] = data;
1691 offsets[len] = data;
1676 len++;
1692 len++;
1677 old_data = data;
1693 old_data = data;
1678 data += incr;
1694 data += incr;
1679 if (data <= old_data)
1695 if (data <= old_data)
1680 break;
1696 break;
1681 }
1697 }
1682
1698
1683 if (data != end && data + v1_hdrsize != end) {
1699 if (data != end && data + v1_hdrsize != end) {
1684 if (!PyErr_Occurred())
1700 if (!PyErr_Occurred())
1685 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1701 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1686 return -1;
1702 return -1;
1687 }
1703 }
1688
1704
1689 return len;
1705 return len;
1690 }
1706 }
1691
1707
1692 static int index_init(indexObject *self, PyObject *args)
1708 static int index_init(indexObject *self, PyObject *args)
1693 {
1709 {
1694 PyObject *data_obj, *inlined_obj;
1710 PyObject *data_obj, *inlined_obj;
1695 Py_ssize_t size;
1711 Py_ssize_t size;
1696
1712
1697 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1713 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1698 return -1;
1714 return -1;
1699 if (!PyString_Check(data_obj)) {
1715 if (!PyString_Check(data_obj)) {
1700 PyErr_SetString(PyExc_TypeError, "data is not a string");
1716 PyErr_SetString(PyExc_TypeError, "data is not a string");
1701 return -1;
1717 return -1;
1702 }
1718 }
1703 size = PyString_GET_SIZE(data_obj);
1719 size = PyString_GET_SIZE(data_obj);
1704
1720
1705 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1721 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1706 self->data = data_obj;
1722 self->data = data_obj;
1707 self->cache = NULL;
1723 self->cache = NULL;
1708
1724
1709 self->added = NULL;
1725 self->added = NULL;
1710 self->headrevs = NULL;
1726 self->headrevs = NULL;
1711 self->offsets = NULL;
1727 self->offsets = NULL;
1712 self->nt = NULL;
1728 self->nt = NULL;
1713 self->ntlength = self->ntcapacity = 0;
1729 self->ntlength = self->ntcapacity = 0;
1714 self->ntdepth = self->ntsplits = 0;
1730 self->ntdepth = self->ntsplits = 0;
1715 self->ntlookups = self->ntmisses = 0;
1731 self->ntlookups = self->ntmisses = 0;
1716 self->ntrev = -1;
1732 self->ntrev = -1;
1717 Py_INCREF(self->data);
1733 Py_INCREF(self->data);
1718
1734
1719 if (self->inlined) {
1735 if (self->inlined) {
1720 long len = inline_scan(self, NULL);
1736 long len = inline_scan(self, NULL);
1721 if (len == -1)
1737 if (len == -1)
1722 goto bail;
1738 goto bail;
1723 self->raw_length = len;
1739 self->raw_length = len;
1724 self->length = len + 1;
1740 self->length = len + 1;
1725 } else {
1741 } else {
1726 if (size % v1_hdrsize) {
1742 if (size % v1_hdrsize) {
1727 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1743 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1728 goto bail;
1744 goto bail;
1729 }
1745 }
1730 self->raw_length = size / v1_hdrsize;
1746 self->raw_length = size / v1_hdrsize;
1731 self->length = self->raw_length + 1;
1747 self->length = self->raw_length + 1;
1732 }
1748 }
1733
1749
1734 return 0;
1750 return 0;
1735 bail:
1751 bail:
1736 return -1;
1752 return -1;
1737 }
1753 }
1738
1754
1739 static PyObject *index_nodemap(indexObject *self)
1755 static PyObject *index_nodemap(indexObject *self)
1740 {
1756 {
1741 Py_INCREF(self);
1757 Py_INCREF(self);
1742 return (PyObject *)self;
1758 return (PyObject *)self;
1743 }
1759 }
1744
1760
1745 static void index_dealloc(indexObject *self)
1761 static void index_dealloc(indexObject *self)
1746 {
1762 {
1747 _index_clearcaches(self);
1763 _index_clearcaches(self);
1748 Py_DECREF(self->data);
1764 Py_DECREF(self->data);
1749 Py_XDECREF(self->added);
1765 Py_XDECREF(self->added);
1750 PyObject_Del(self);
1766 PyObject_Del(self);
1751 }
1767 }
1752
1768
1753 static PySequenceMethods index_sequence_methods = {
1769 static PySequenceMethods index_sequence_methods = {
1754 (lenfunc)index_length, /* sq_length */
1770 (lenfunc)index_length, /* sq_length */
1755 0, /* sq_concat */
1771 0, /* sq_concat */
1756 0, /* sq_repeat */
1772 0, /* sq_repeat */
1757 (ssizeargfunc)index_get, /* sq_item */
1773 (ssizeargfunc)index_get, /* sq_item */
1758 0, /* sq_slice */
1774 0, /* sq_slice */
1759 0, /* sq_ass_item */
1775 0, /* sq_ass_item */
1760 0, /* sq_ass_slice */
1776 0, /* sq_ass_slice */
1761 (objobjproc)index_contains, /* sq_contains */
1777 (objobjproc)index_contains, /* sq_contains */
1762 };
1778 };
1763
1779
1764 static PyMappingMethods index_mapping_methods = {
1780 static PyMappingMethods index_mapping_methods = {
1765 (lenfunc)index_length, /* mp_length */
1781 (lenfunc)index_length, /* mp_length */
1766 (binaryfunc)index_getitem, /* mp_subscript */
1782 (binaryfunc)index_getitem, /* mp_subscript */
1767 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1783 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1768 };
1784 };
1769
1785
1770 static PyMethodDef index_methods[] = {
1786 static PyMethodDef index_methods[] = {
1771 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1787 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1772 "return the gca set of the given revs"},
1788 "return the gca set of the given revs"},
1773 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1789 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1774 "clear the index caches"},
1790 "clear the index caches"},
1775 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1791 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1776 "get an index entry"},
1792 "get an index entry"},
1777 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1793 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1778 "get head revisions"},
1794 "get head revisions"},
1779 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1795 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1780 "insert an index entry"},
1796 "insert an index entry"},
1781 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1797 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1782 "match a potentially ambiguous node ID"},
1798 "match a potentially ambiguous node ID"},
1783 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1799 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1784 "stats for the index"},
1800 "stats for the index"},
1785 {NULL} /* Sentinel */
1801 {NULL} /* Sentinel */
1786 };
1802 };
1787
1803
1788 static PyGetSetDef index_getset[] = {
1804 static PyGetSetDef index_getset[] = {
1789 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1805 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1790 {NULL} /* Sentinel */
1806 {NULL} /* Sentinel */
1791 };
1807 };
1792
1808
1793 static PyTypeObject indexType = {
1809 static PyTypeObject indexType = {
1794 PyObject_HEAD_INIT(NULL)
1810 PyObject_HEAD_INIT(NULL)
1795 0, /* ob_size */
1811 0, /* ob_size */
1796 "parsers.index", /* tp_name */
1812 "parsers.index", /* tp_name */
1797 sizeof(indexObject), /* tp_basicsize */
1813 sizeof(indexObject), /* tp_basicsize */
1798 0, /* tp_itemsize */
1814 0, /* tp_itemsize */
1799 (destructor)index_dealloc, /* tp_dealloc */
1815 (destructor)index_dealloc, /* tp_dealloc */
1800 0, /* tp_print */
1816 0, /* tp_print */
1801 0, /* tp_getattr */
1817 0, /* tp_getattr */
1802 0, /* tp_setattr */
1818 0, /* tp_setattr */
1803 0, /* tp_compare */
1819 0, /* tp_compare */
1804 0, /* tp_repr */
1820 0, /* tp_repr */
1805 0, /* tp_as_number */
1821 0, /* tp_as_number */
1806 &index_sequence_methods, /* tp_as_sequence */
1822 &index_sequence_methods, /* tp_as_sequence */
1807 &index_mapping_methods, /* tp_as_mapping */
1823 &index_mapping_methods, /* tp_as_mapping */
1808 0, /* tp_hash */
1824 0, /* tp_hash */
1809 0, /* tp_call */
1825 0, /* tp_call */
1810 0, /* tp_str */
1826 0, /* tp_str */
1811 0, /* tp_getattro */
1827 0, /* tp_getattro */
1812 0, /* tp_setattro */
1828 0, /* tp_setattro */
1813 0, /* tp_as_buffer */
1829 0, /* tp_as_buffer */
1814 Py_TPFLAGS_DEFAULT, /* tp_flags */
1830 Py_TPFLAGS_DEFAULT, /* tp_flags */
1815 "revlog index", /* tp_doc */
1831 "revlog index", /* tp_doc */
1816 0, /* tp_traverse */
1832 0, /* tp_traverse */
1817 0, /* tp_clear */
1833 0, /* tp_clear */
1818 0, /* tp_richcompare */
1834 0, /* tp_richcompare */
1819 0, /* tp_weaklistoffset */
1835 0, /* tp_weaklistoffset */
1820 0, /* tp_iter */
1836 0, /* tp_iter */
1821 0, /* tp_iternext */
1837 0, /* tp_iternext */
1822 index_methods, /* tp_methods */
1838 index_methods, /* tp_methods */
1823 0, /* tp_members */
1839 0, /* tp_members */
1824 index_getset, /* tp_getset */
1840 index_getset, /* tp_getset */
1825 0, /* tp_base */
1841 0, /* tp_base */
1826 0, /* tp_dict */
1842 0, /* tp_dict */
1827 0, /* tp_descr_get */
1843 0, /* tp_descr_get */
1828 0, /* tp_descr_set */
1844 0, /* tp_descr_set */
1829 0, /* tp_dictoffset */
1845 0, /* tp_dictoffset */
1830 (initproc)index_init, /* tp_init */
1846 (initproc)index_init, /* tp_init */
1831 0, /* tp_alloc */
1847 0, /* tp_alloc */
1832 };
1848 };
1833
1849
1834 /*
1850 /*
1835 * returns a tuple of the form (index, index, cache) with elements as
1851 * returns a tuple of the form (index, index, cache) with elements as
1836 * follows:
1852 * follows:
1837 *
1853 *
1838 * index: an index object that lazily parses RevlogNG records
1854 * index: an index object that lazily parses RevlogNG records
1839 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1855 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1840 *
1856 *
1841 * added complications are for backwards compatibility
1857 * added complications are for backwards compatibility
1842 */
1858 */
1843 static PyObject *parse_index2(PyObject *self, PyObject *args)
1859 static PyObject *parse_index2(PyObject *self, PyObject *args)
1844 {
1860 {
1845 PyObject *tuple = NULL, *cache = NULL;
1861 PyObject *tuple = NULL, *cache = NULL;
1846 indexObject *idx;
1862 indexObject *idx;
1847 int ret;
1863 int ret;
1848
1864
1849 idx = PyObject_New(indexObject, &indexType);
1865 idx = PyObject_New(indexObject, &indexType);
1850 if (idx == NULL)
1866 if (idx == NULL)
1851 goto bail;
1867 goto bail;
1852
1868
1853 ret = index_init(idx, args);
1869 ret = index_init(idx, args);
1854 if (ret == -1)
1870 if (ret == -1)
1855 goto bail;
1871 goto bail;
1856
1872
1857 if (idx->inlined) {
1873 if (idx->inlined) {
1858 cache = Py_BuildValue("iO", 0, idx->data);
1874 cache = Py_BuildValue("iO", 0, idx->data);
1859 if (cache == NULL)
1875 if (cache == NULL)
1860 goto bail;
1876 goto bail;
1861 } else {
1877 } else {
1862 cache = Py_None;
1878 cache = Py_None;
1863 Py_INCREF(cache);
1879 Py_INCREF(cache);
1864 }
1880 }
1865
1881
1866 tuple = Py_BuildValue("NN", idx, cache);
1882 tuple = Py_BuildValue("NN", idx, cache);
1867 if (!tuple)
1883 if (!tuple)
1868 goto bail;
1884 goto bail;
1869 return tuple;
1885 return tuple;
1870
1886
1871 bail:
1887 bail:
1872 Py_XDECREF(idx);
1888 Py_XDECREF(idx);
1873 Py_XDECREF(cache);
1889 Py_XDECREF(cache);
1874 Py_XDECREF(tuple);
1890 Py_XDECREF(tuple);
1875 return NULL;
1891 return NULL;
1876 }
1892 }
1877
1893
1878 static char parsers_doc[] = "Efficient content parsing.";
1894 static char parsers_doc[] = "Efficient content parsing.";
1879
1895
1880 PyObject *encodedir(PyObject *self, PyObject *args);
1896 PyObject *encodedir(PyObject *self, PyObject *args);
1881 PyObject *pathencode(PyObject *self, PyObject *args);
1897 PyObject *pathencode(PyObject *self, PyObject *args);
1882 PyObject *lowerencode(PyObject *self, PyObject *args);
1898 PyObject *lowerencode(PyObject *self, PyObject *args);
1883
1899
1884 static PyMethodDef methods[] = {
1900 static PyMethodDef methods[] = {
1885 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1901 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1886 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1902 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1887 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1903 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1888 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1904 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1889 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1905 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1890 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1906 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1891 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1907 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1892 {NULL, NULL}
1908 {NULL, NULL}
1893 };
1909 };
1894
1910
1895 void dirs_module_init(PyObject *mod);
1911 void dirs_module_init(PyObject *mod);
1896
1912
1897 static void module_init(PyObject *mod)
1913 static void module_init(PyObject *mod)
1898 {
1914 {
1899 dirs_module_init(mod);
1915 dirs_module_init(mod);
1900
1916
1901 indexType.tp_new = PyType_GenericNew;
1917 indexType.tp_new = PyType_GenericNew;
1902 if (PyType_Ready(&indexType) < 0)
1918 if (PyType_Ready(&indexType) < 0)
1903 return;
1919 return;
1904 Py_INCREF(&indexType);
1920 Py_INCREF(&indexType);
1905
1921
1906 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1922 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1907
1923
1908 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1924 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1909 -1, -1, -1, -1, nullid, 20);
1925 -1, -1, -1, -1, nullid, 20);
1910 if (nullentry)
1926 if (nullentry)
1911 PyObject_GC_UnTrack(nullentry);
1927 PyObject_GC_UnTrack(nullentry);
1912
1928
1913 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1929 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1914 }
1930 }
1915
1931
1916 #ifdef IS_PY3K
1932 #ifdef IS_PY3K
1917 static struct PyModuleDef parsers_module = {
1933 static struct PyModuleDef parsers_module = {
1918 PyModuleDef_HEAD_INIT,
1934 PyModuleDef_HEAD_INIT,
1919 "parsers",
1935 "parsers",
1920 parsers_doc,
1936 parsers_doc,
1921 -1,
1937 -1,
1922 methods
1938 methods
1923 };
1939 };
1924
1940
1925 PyMODINIT_FUNC PyInit_parsers(void)
1941 PyMODINIT_FUNC PyInit_parsers(void)
1926 {
1942 {
1927 PyObject *mod = PyModule_Create(&parsers_module);
1943 PyObject *mod = PyModule_Create(&parsers_module);
1928 module_init(mod);
1944 module_init(mod);
1929 return mod;
1945 return mod;
1930 }
1946 }
1931 #else
1947 #else
1932 PyMODINIT_FUNC initparsers(void)
1948 PyMODINIT_FUNC initparsers(void)
1933 {
1949 {
1934 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1950 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1935 module_init(mod);
1951 module_init(mod);
1936 }
1952 }
1937 #endif
1953 #endif
General Comments 0
You need to be logged in to leave comments. Login now