##// END OF EJS Templates
parsers: fix an integer size warning issued by clang
Bryan O'Sullivan -
r17356:511dfb34 default
parent child Browse files
Show More
@@ -1,1554 +1,1555 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <stddef.h>
12 #include <string.h>
13 #include <string.h>
13
14
14 #include "util.h"
15 #include "util.h"
15
16
16 static inline int hexdigit(const char *p, Py_ssize_t off)
17 static inline int hexdigit(const char *p, Py_ssize_t off)
17 {
18 {
18 char c = p[off];
19 char c = p[off];
19
20
20 if (c >= '0' && c <= '9')
21 if (c >= '0' && c <= '9')
21 return c - '0';
22 return c - '0';
22 if (c >= 'a' && c <= 'f')
23 if (c >= 'a' && c <= 'f')
23 return c - 'a' + 10;
24 return c - 'a' + 10;
24 if (c >= 'A' && c <= 'F')
25 if (c >= 'A' && c <= 'F')
25 return c - 'A' + 10;
26 return c - 'A' + 10;
26
27
27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 return 0;
29 return 0;
29 }
30 }
30
31
31 /*
32 /*
32 * Turn a hex-encoded string into binary.
33 * Turn a hex-encoded string into binary.
33 */
34 */
34 static PyObject *unhexlify(const char *str, int len)
35 static PyObject *unhexlify(const char *str, int len)
35 {
36 {
36 PyObject *ret;
37 PyObject *ret;
37 char *d;
38 char *d;
38 int i;
39 int i;
39
40
40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41
42
42 if (!ret)
43 if (!ret)
43 return NULL;
44 return NULL;
44
45
45 d = PyBytes_AsString(ret);
46 d = PyBytes_AsString(ret);
46
47
47 for (i = 0; i < len;) {
48 for (i = 0; i < len;) {
48 int hi = hexdigit(str, i++);
49 int hi = hexdigit(str, i++);
49 int lo = hexdigit(str, i++);
50 int lo = hexdigit(str, i++);
50 *d++ = (hi << 4) | lo;
51 *d++ = (hi << 4) | lo;
51 }
52 }
52
53
53 return ret;
54 return ret;
54 }
55 }
55
56
56 /*
57 /*
57 * This code assumes that a manifest is stitched together with newline
58 * This code assumes that a manifest is stitched together with newline
58 * ('\n') characters.
59 * ('\n') characters.
59 */
60 */
60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 {
62 {
62 PyObject *mfdict, *fdict;
63 PyObject *mfdict, *fdict;
63 char *str, *cur, *start, *zero;
64 char *str, *cur, *start, *zero;
64 int len;
65 int len;
65
66
66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 &PyDict_Type, &mfdict,
68 &PyDict_Type, &mfdict,
68 &PyDict_Type, &fdict,
69 &PyDict_Type, &fdict,
69 &str, &len))
70 &str, &len))
70 goto quit;
71 goto quit;
71
72
72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 PyObject *file = NULL, *node = NULL;
74 PyObject *file = NULL, *node = NULL;
74 PyObject *flags = NULL;
75 PyObject *flags = NULL;
75 int nlen;
76 ptrdiff_t nlen;
76
77
77 if (!*cur) {
78 if (!*cur) {
78 zero = cur;
79 zero = cur;
79 continue;
80 continue;
80 }
81 }
81 else if (*cur != '\n')
82 else if (*cur != '\n')
82 continue;
83 continue;
83
84
84 if (!zero) {
85 if (!zero) {
85 PyErr_SetString(PyExc_ValueError,
86 PyErr_SetString(PyExc_ValueError,
86 "manifest entry has no separator");
87 "manifest entry has no separator");
87 goto quit;
88 goto quit;
88 }
89 }
89
90
90 file = PyBytes_FromStringAndSize(start, zero - start);
91 file = PyBytes_FromStringAndSize(start, zero - start);
91
92
92 if (!file)
93 if (!file)
93 goto bail;
94 goto bail;
94
95
95 nlen = cur - zero - 1;
96 nlen = cur - zero - 1;
96
97
97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
98 if (!node)
99 if (!node)
99 goto bail;
100 goto bail;
100
101
101 if (nlen > 40) {
102 if (nlen > 40) {
102 flags = PyBytes_FromStringAndSize(zero + 41,
103 flags = PyBytes_FromStringAndSize(zero + 41,
103 nlen - 40);
104 nlen - 40);
104 if (!flags)
105 if (!flags)
105 goto bail;
106 goto bail;
106
107
107 if (PyDict_SetItem(fdict, file, flags) == -1)
108 if (PyDict_SetItem(fdict, file, flags) == -1)
108 goto bail;
109 goto bail;
109 }
110 }
110
111
111 if (PyDict_SetItem(mfdict, file, node) == -1)
112 if (PyDict_SetItem(mfdict, file, node) == -1)
112 goto bail;
113 goto bail;
113
114
114 start = cur + 1;
115 start = cur + 1;
115 zero = NULL;
116 zero = NULL;
116
117
117 Py_XDECREF(flags);
118 Py_XDECREF(flags);
118 Py_XDECREF(node);
119 Py_XDECREF(node);
119 Py_XDECREF(file);
120 Py_XDECREF(file);
120 continue;
121 continue;
121 bail:
122 bail:
122 Py_XDECREF(flags);
123 Py_XDECREF(flags);
123 Py_XDECREF(node);
124 Py_XDECREF(node);
124 Py_XDECREF(file);
125 Py_XDECREF(file);
125 goto quit;
126 goto quit;
126 }
127 }
127
128
128 if (len > 0 && *(cur - 1) != '\n') {
129 if (len > 0 && *(cur - 1) != '\n') {
129 PyErr_SetString(PyExc_ValueError,
130 PyErr_SetString(PyExc_ValueError,
130 "manifest contains trailing garbage");
131 "manifest contains trailing garbage");
131 goto quit;
132 goto quit;
132 }
133 }
133
134
134 Py_INCREF(Py_None);
135 Py_INCREF(Py_None);
135 return Py_None;
136 return Py_None;
136 quit:
137 quit:
137 return NULL;
138 return NULL;
138 }
139 }
139
140
140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 {
142 {
142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 char *str, *cur, *end, *cpos;
145 char *str, *cur, *end, *cpos;
145 int state, mode, size, mtime;
146 int state, mode, size, mtime;
146 unsigned int flen;
147 unsigned int flen;
147 int len;
148 int len;
148
149
149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 &PyDict_Type, &dmap,
151 &PyDict_Type, &dmap,
151 &PyDict_Type, &cmap,
152 &PyDict_Type, &cmap,
152 &str, &len))
153 &str, &len))
153 goto quit;
154 goto quit;
154
155
155 /* read parents */
156 /* read parents */
156 if (len < 40)
157 if (len < 40)
157 goto quit;
158 goto quit;
158
159
159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 if (!parents)
161 if (!parents)
161 goto quit;
162 goto quit;
162
163
163 /* read filenames */
164 /* read filenames */
164 cur = str + 40;
165 cur = str + 40;
165 end = str + len;
166 end = str + len;
166
167
167 while (cur < end - 17) {
168 while (cur < end - 17) {
168 /* unpack header */
169 /* unpack header */
169 state = *cur;
170 state = *cur;
170 mode = getbe32(cur + 1);
171 mode = getbe32(cur + 1);
171 size = getbe32(cur + 5);
172 size = getbe32(cur + 5);
172 mtime = getbe32(cur + 9);
173 mtime = getbe32(cur + 9);
173 flen = getbe32(cur + 13);
174 flen = getbe32(cur + 13);
174 cur += 17;
175 cur += 17;
175 if (cur + flen > end || cur + flen < cur) {
176 if (cur + flen > end || cur + flen < cur) {
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 goto quit;
178 goto quit;
178 }
179 }
179
180
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 if (!entry)
182 if (!entry)
182 goto quit;
183 goto quit;
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184
185
185 cpos = memchr(cur, 0, flen);
186 cpos = memchr(cur, 0, flen);
186 if (cpos) {
187 if (cpos) {
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 cname = PyBytes_FromStringAndSize(cpos + 1,
189 flen - (cpos - cur) - 1);
190 flen - (cpos - cur) - 1);
190 if (!fname || !cname ||
191 if (!fname || !cname ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 PyDict_SetItem(dmap, fname, entry) == -1)
193 PyDict_SetItem(dmap, fname, entry) == -1)
193 goto quit;
194 goto quit;
194 Py_DECREF(cname);
195 Py_DECREF(cname);
195 } else {
196 } else {
196 fname = PyBytes_FromStringAndSize(cur, flen);
197 fname = PyBytes_FromStringAndSize(cur, flen);
197 if (!fname ||
198 if (!fname ||
198 PyDict_SetItem(dmap, fname, entry) == -1)
199 PyDict_SetItem(dmap, fname, entry) == -1)
199 goto quit;
200 goto quit;
200 }
201 }
201 cur += flen;
202 cur += flen;
202 Py_DECREF(fname);
203 Py_DECREF(fname);
203 Py_DECREF(entry);
204 Py_DECREF(entry);
204 fname = cname = entry = NULL;
205 fname = cname = entry = NULL;
205 }
206 }
206
207
207 ret = parents;
208 ret = parents;
208 Py_INCREF(ret);
209 Py_INCREF(ret);
209 quit:
210 quit:
210 Py_XDECREF(fname);
211 Py_XDECREF(fname);
211 Py_XDECREF(cname);
212 Py_XDECREF(cname);
212 Py_XDECREF(entry);
213 Py_XDECREF(entry);
213 Py_XDECREF(parents);
214 Py_XDECREF(parents);
214 return ret;
215 return ret;
215 }
216 }
216
217
217 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
218 {
219 {
219 PyObject *o = PyTuple_GET_ITEM(tuple, off);
220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
220 long val;
221 long val;
221
222
222 if (PyInt_Check(o))
223 if (PyInt_Check(o))
223 val = PyInt_AS_LONG(o);
224 val = PyInt_AS_LONG(o);
224 else if (PyLong_Check(o)) {
225 else if (PyLong_Check(o)) {
225 val = PyLong_AsLong(o);
226 val = PyLong_AsLong(o);
226 if (val == -1 && PyErr_Occurred())
227 if (val == -1 && PyErr_Occurred())
227 return -1;
228 return -1;
228 } else {
229 } else {
229 PyErr_SetString(PyExc_TypeError, "expected an int or long");
230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
230 return -1;
231 return -1;
231 }
232 }
232 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
233 PyErr_SetString(PyExc_OverflowError,
234 PyErr_SetString(PyExc_OverflowError,
234 "Python value to large to convert to uint32_t");
235 "Python value to large to convert to uint32_t");
235 return -1;
236 return -1;
236 }
237 }
237 *v = (uint32_t)val;
238 *v = (uint32_t)val;
238 return 0;
239 return 0;
239 }
240 }
240
241
241 static PyObject *dirstate_unset;
242 static PyObject *dirstate_unset;
242
243
243 /*
244 /*
244 * Efficiently pack a dirstate object into its on-disk format.
245 * Efficiently pack a dirstate object into its on-disk format.
245 */
246 */
246 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
247 {
248 {
248 PyObject *packobj = NULL;
249 PyObject *packobj = NULL;
249 PyObject *map, *copymap, *pl;
250 PyObject *map, *copymap, *pl;
250 Py_ssize_t nbytes, pos, l;
251 Py_ssize_t nbytes, pos, l;
251 PyObject *k, *v, *pn;
252 PyObject *k, *v, *pn;
252 char *p, *s;
253 char *p, *s;
253 double now;
254 double now;
254
255
255 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
256 &PyDict_Type, &map, &PyDict_Type, &copymap,
257 &PyDict_Type, &map, &PyDict_Type, &copymap,
257 &pl, &now))
258 &pl, &now))
258 return NULL;
259 return NULL;
259
260
260 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
261 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
262 return NULL;
263 return NULL;
263 }
264 }
264
265
265 /* Figure out how much we need to allocate. */
266 /* Figure out how much we need to allocate. */
266 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
267 PyObject *c;
268 PyObject *c;
268 if (!PyString_Check(k)) {
269 if (!PyString_Check(k)) {
269 PyErr_SetString(PyExc_TypeError, "expected string key");
270 PyErr_SetString(PyExc_TypeError, "expected string key");
270 goto bail;
271 goto bail;
271 }
272 }
272 nbytes += PyString_GET_SIZE(k) + 17;
273 nbytes += PyString_GET_SIZE(k) + 17;
273 c = PyDict_GetItem(copymap, k);
274 c = PyDict_GetItem(copymap, k);
274 if (c) {
275 if (c) {
275 if (!PyString_Check(c)) {
276 if (!PyString_Check(c)) {
276 PyErr_SetString(PyExc_TypeError,
277 PyErr_SetString(PyExc_TypeError,
277 "expected string key");
278 "expected string key");
278 goto bail;
279 goto bail;
279 }
280 }
280 nbytes += PyString_GET_SIZE(c) + 1;
281 nbytes += PyString_GET_SIZE(c) + 1;
281 }
282 }
282 }
283 }
283
284
284 packobj = PyString_FromStringAndSize(NULL, nbytes);
285 packobj = PyString_FromStringAndSize(NULL, nbytes);
285 if (packobj == NULL)
286 if (packobj == NULL)
286 goto bail;
287 goto bail;
287
288
288 p = PyString_AS_STRING(packobj);
289 p = PyString_AS_STRING(packobj);
289
290
290 pn = PySequence_ITEM(pl, 0);
291 pn = PySequence_ITEM(pl, 0);
291 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
292 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
293 goto bail;
294 goto bail;
294 }
295 }
295 memcpy(p, s, l);
296 memcpy(p, s, l);
296 p += 20;
297 p += 20;
297 pn = PySequence_ITEM(pl, 1);
298 pn = PySequence_ITEM(pl, 1);
298 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
299 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
300 goto bail;
301 goto bail;
301 }
302 }
302 memcpy(p, s, l);
303 memcpy(p, s, l);
303 p += 20;
304 p += 20;
304
305
305 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
306 uint32_t mode, size, mtime;
307 uint32_t mode, size, mtime;
307 Py_ssize_t len, l;
308 Py_ssize_t len, l;
308 PyObject *o;
309 PyObject *o;
309 char *s, *t;
310 char *s, *t;
310
311
311 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
312 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
313 goto bail;
314 goto bail;
314 }
315 }
315 o = PyTuple_GET_ITEM(v, 0);
316 o = PyTuple_GET_ITEM(v, 0);
316 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
317 PyErr_SetString(PyExc_TypeError, "expected one byte");
318 PyErr_SetString(PyExc_TypeError, "expected one byte");
318 goto bail;
319 goto bail;
319 }
320 }
320 *p++ = *s;
321 *p++ = *s;
321 if (getintat(v, 1, &mode) == -1)
322 if (getintat(v, 1, &mode) == -1)
322 goto bail;
323 goto bail;
323 if (getintat(v, 2, &size) == -1)
324 if (getintat(v, 2, &size) == -1)
324 goto bail;
325 goto bail;
325 if (getintat(v, 3, &mtime) == -1)
326 if (getintat(v, 3, &mtime) == -1)
326 goto bail;
327 goto bail;
327 if (*s == 'n' && mtime == (uint32_t)now) {
328 if (*s == 'n' && mtime == (uint32_t)now) {
328 /* See dirstate.py:write for why we do this. */
329 /* See dirstate.py:write for why we do this. */
329 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
330 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
330 goto bail;
331 goto bail;
331 mode = 0, size = -1, mtime = -1;
332 mode = 0, size = -1, mtime = -1;
332 }
333 }
333 putbe32(mode, p);
334 putbe32(mode, p);
334 putbe32(size, p + 4);
335 putbe32(size, p + 4);
335 putbe32(mtime, p + 8);
336 putbe32(mtime, p + 8);
336 t = p + 12;
337 t = p + 12;
337 p += 16;
338 p += 16;
338 len = PyString_GET_SIZE(k);
339 len = PyString_GET_SIZE(k);
339 memcpy(p, PyString_AS_STRING(k), len);
340 memcpy(p, PyString_AS_STRING(k), len);
340 p += len;
341 p += len;
341 o = PyDict_GetItem(copymap, k);
342 o = PyDict_GetItem(copymap, k);
342 if (o) {
343 if (o) {
343 *p++ = '\0';
344 *p++ = '\0';
344 l = PyString_GET_SIZE(o);
345 l = PyString_GET_SIZE(o);
345 memcpy(p, PyString_AS_STRING(o), l);
346 memcpy(p, PyString_AS_STRING(o), l);
346 p += l;
347 p += l;
347 len += l + 1;
348 len += l + 1;
348 }
349 }
349 putbe32((uint32_t)len, t);
350 putbe32((uint32_t)len, t);
350 }
351 }
351
352
352 pos = p - PyString_AS_STRING(packobj);
353 pos = p - PyString_AS_STRING(packobj);
353 if (pos != nbytes) {
354 if (pos != nbytes) {
354 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
355 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
355 (long)pos, (long)nbytes);
356 (long)pos, (long)nbytes);
356 goto bail;
357 goto bail;
357 }
358 }
358
359
359 return packobj;
360 return packobj;
360 bail:
361 bail:
361 Py_XDECREF(packobj);
362 Py_XDECREF(packobj);
362 return NULL;
363 return NULL;
363 }
364 }
364
365
365 /*
366 /*
366 * A base-16 trie for fast node->rev mapping.
367 * A base-16 trie for fast node->rev mapping.
367 *
368 *
368 * Positive value is index of the next node in the trie
369 * Positive value is index of the next node in the trie
369 * Negative value is a leaf: -(rev + 1)
370 * Negative value is a leaf: -(rev + 1)
370 * Zero is empty
371 * Zero is empty
371 */
372 */
372 typedef struct {
373 typedef struct {
373 int children[16];
374 int children[16];
374 } nodetree;
375 } nodetree;
375
376
376 /*
377 /*
377 * This class has two behaviours.
378 * This class has two behaviours.
378 *
379 *
379 * When used in a list-like way (with integer keys), we decode an
380 * When used in a list-like way (with integer keys), we decode an
380 * entry in a RevlogNG index file on demand. Our last entry is a
381 * entry in a RevlogNG index file on demand. Our last entry is a
381 * sentinel, always a nullid. We have limited support for
382 * sentinel, always a nullid. We have limited support for
382 * integer-keyed insert and delete, only at elements right before the
383 * integer-keyed insert and delete, only at elements right before the
383 * sentinel.
384 * sentinel.
384 *
385 *
385 * With string keys, we lazily perform a reverse mapping from node to
386 * With string keys, we lazily perform a reverse mapping from node to
386 * rev, using a base-16 trie.
387 * rev, using a base-16 trie.
387 */
388 */
388 typedef struct {
389 typedef struct {
389 PyObject_HEAD
390 PyObject_HEAD
390 /* Type-specific fields go here. */
391 /* Type-specific fields go here. */
391 PyObject *data; /* raw bytes of index */
392 PyObject *data; /* raw bytes of index */
392 PyObject **cache; /* cached tuples */
393 PyObject **cache; /* cached tuples */
393 const char **offsets; /* populated on demand */
394 const char **offsets; /* populated on demand */
394 Py_ssize_t raw_length; /* original number of elements */
395 Py_ssize_t raw_length; /* original number of elements */
395 Py_ssize_t length; /* current number of elements */
396 Py_ssize_t length; /* current number of elements */
396 PyObject *added; /* populated on demand */
397 PyObject *added; /* populated on demand */
397 PyObject *headrevs; /* cache, invalidated on changes */
398 PyObject *headrevs; /* cache, invalidated on changes */
398 nodetree *nt; /* base-16 trie */
399 nodetree *nt; /* base-16 trie */
399 int ntlength; /* # nodes in use */
400 int ntlength; /* # nodes in use */
400 int ntcapacity; /* # nodes allocated */
401 int ntcapacity; /* # nodes allocated */
401 int ntdepth; /* maximum depth of tree */
402 int ntdepth; /* maximum depth of tree */
402 int ntsplits; /* # splits performed */
403 int ntsplits; /* # splits performed */
403 int ntrev; /* last rev scanned */
404 int ntrev; /* last rev scanned */
404 int ntlookups; /* # lookups */
405 int ntlookups; /* # lookups */
405 int ntmisses; /* # lookups that miss the cache */
406 int ntmisses; /* # lookups that miss the cache */
406 int inlined;
407 int inlined;
407 } indexObject;
408 } indexObject;
408
409
409 static Py_ssize_t index_length(const indexObject *self)
410 static Py_ssize_t index_length(const indexObject *self)
410 {
411 {
411 if (self->added == NULL)
412 if (self->added == NULL)
412 return self->length;
413 return self->length;
413 return self->length + PyList_GET_SIZE(self->added);
414 return self->length + PyList_GET_SIZE(self->added);
414 }
415 }
415
416
416 static PyObject *nullentry;
417 static PyObject *nullentry;
417 static const char nullid[20];
418 static const char nullid[20];
418
419
419 static long inline_scan(indexObject *self, const char **offsets);
420 static long inline_scan(indexObject *self, const char **offsets);
420
421
421 #if LONG_MAX == 0x7fffffffL
422 #if LONG_MAX == 0x7fffffffL
422 static char *tuple_format = "Kiiiiiis#";
423 static char *tuple_format = "Kiiiiiis#";
423 #else
424 #else
424 static char *tuple_format = "kiiiiiis#";
425 static char *tuple_format = "kiiiiiis#";
425 #endif
426 #endif
426
427
427 /* A RevlogNG v1 index entry is 64 bytes long. */
428 /* A RevlogNG v1 index entry is 64 bytes long. */
428 static const long v1_hdrsize = 64;
429 static const long v1_hdrsize = 64;
429
430
430 /*
431 /*
431 * Return a pointer to the beginning of a RevlogNG record.
432 * Return a pointer to the beginning of a RevlogNG record.
432 */
433 */
433 static const char *index_deref(indexObject *self, Py_ssize_t pos)
434 static const char *index_deref(indexObject *self, Py_ssize_t pos)
434 {
435 {
435 if (self->inlined && pos > 0) {
436 if (self->inlined && pos > 0) {
436 if (self->offsets == NULL) {
437 if (self->offsets == NULL) {
437 self->offsets = malloc(self->raw_length *
438 self->offsets = malloc(self->raw_length *
438 sizeof(*self->offsets));
439 sizeof(*self->offsets));
439 if (self->offsets == NULL)
440 if (self->offsets == NULL)
440 return (const char *)PyErr_NoMemory();
441 return (const char *)PyErr_NoMemory();
441 inline_scan(self, self->offsets);
442 inline_scan(self, self->offsets);
442 }
443 }
443 return self->offsets[pos];
444 return self->offsets[pos];
444 }
445 }
445
446
446 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
447 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
447 }
448 }
448
449
449 /*
450 /*
450 * RevlogNG format (all in big endian, data may be inlined):
451 * RevlogNG format (all in big endian, data may be inlined):
451 * 6 bytes: offset
452 * 6 bytes: offset
452 * 2 bytes: flags
453 * 2 bytes: flags
453 * 4 bytes: compressed length
454 * 4 bytes: compressed length
454 * 4 bytes: uncompressed length
455 * 4 bytes: uncompressed length
455 * 4 bytes: base revision
456 * 4 bytes: base revision
456 * 4 bytes: link revision
457 * 4 bytes: link revision
457 * 4 bytes: parent 1 revision
458 * 4 bytes: parent 1 revision
458 * 4 bytes: parent 2 revision
459 * 4 bytes: parent 2 revision
459 * 32 bytes: nodeid (only 20 bytes used)
460 * 32 bytes: nodeid (only 20 bytes used)
460 */
461 */
461 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
462 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
462 {
463 {
463 uint64_t offset_flags;
464 uint64_t offset_flags;
464 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
465 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
465 const char *c_node_id;
466 const char *c_node_id;
466 const char *data;
467 const char *data;
467 Py_ssize_t length = index_length(self);
468 Py_ssize_t length = index_length(self);
468 PyObject *entry;
469 PyObject *entry;
469
470
470 if (pos < 0)
471 if (pos < 0)
471 pos += length;
472 pos += length;
472
473
473 if (pos < 0 || pos >= length) {
474 if (pos < 0 || pos >= length) {
474 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
475 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
475 return NULL;
476 return NULL;
476 }
477 }
477
478
478 if (pos == length - 1) {
479 if (pos == length - 1) {
479 Py_INCREF(nullentry);
480 Py_INCREF(nullentry);
480 return nullentry;
481 return nullentry;
481 }
482 }
482
483
483 if (pos >= self->length - 1) {
484 if (pos >= self->length - 1) {
484 PyObject *obj;
485 PyObject *obj;
485 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
486 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
486 Py_INCREF(obj);
487 Py_INCREF(obj);
487 return obj;
488 return obj;
488 }
489 }
489
490
490 if (self->cache) {
491 if (self->cache) {
491 if (self->cache[pos]) {
492 if (self->cache[pos]) {
492 Py_INCREF(self->cache[pos]);
493 Py_INCREF(self->cache[pos]);
493 return self->cache[pos];
494 return self->cache[pos];
494 }
495 }
495 } else {
496 } else {
496 self->cache = calloc(self->raw_length, sizeof(PyObject *));
497 self->cache = calloc(self->raw_length, sizeof(PyObject *));
497 if (self->cache == NULL)
498 if (self->cache == NULL)
498 return PyErr_NoMemory();
499 return PyErr_NoMemory();
499 }
500 }
500
501
501 data = index_deref(self, pos);
502 data = index_deref(self, pos);
502 if (data == NULL)
503 if (data == NULL)
503 return NULL;
504 return NULL;
504
505
505 offset_flags = getbe32(data + 4);
506 offset_flags = getbe32(data + 4);
506 if (pos == 0) /* mask out version number for the first entry */
507 if (pos == 0) /* mask out version number for the first entry */
507 offset_flags &= 0xFFFF;
508 offset_flags &= 0xFFFF;
508 else {
509 else {
509 uint32_t offset_high = getbe32(data);
510 uint32_t offset_high = getbe32(data);
510 offset_flags |= ((uint64_t)offset_high) << 32;
511 offset_flags |= ((uint64_t)offset_high) << 32;
511 }
512 }
512
513
513 comp_len = getbe32(data + 8);
514 comp_len = getbe32(data + 8);
514 uncomp_len = getbe32(data + 12);
515 uncomp_len = getbe32(data + 12);
515 base_rev = getbe32(data + 16);
516 base_rev = getbe32(data + 16);
516 link_rev = getbe32(data + 20);
517 link_rev = getbe32(data + 20);
517 parent_1 = getbe32(data + 24);
518 parent_1 = getbe32(data + 24);
518 parent_2 = getbe32(data + 28);
519 parent_2 = getbe32(data + 28);
519 c_node_id = data + 32;
520 c_node_id = data + 32;
520
521
521 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
522 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
522 uncomp_len, base_rev, link_rev,
523 uncomp_len, base_rev, link_rev,
523 parent_1, parent_2, c_node_id, 20);
524 parent_1, parent_2, c_node_id, 20);
524
525
525 if (entry)
526 if (entry)
526 PyObject_GC_UnTrack(entry);
527 PyObject_GC_UnTrack(entry);
527
528
528 self->cache[pos] = entry;
529 self->cache[pos] = entry;
529 Py_INCREF(entry);
530 Py_INCREF(entry);
530
531
531 return entry;
532 return entry;
532 }
533 }
533
534
534 /*
535 /*
535 * Return the 20-byte SHA of the node corresponding to the given rev.
536 * Return the 20-byte SHA of the node corresponding to the given rev.
536 */
537 */
537 static const char *index_node(indexObject *self, Py_ssize_t pos)
538 static const char *index_node(indexObject *self, Py_ssize_t pos)
538 {
539 {
539 Py_ssize_t length = index_length(self);
540 Py_ssize_t length = index_length(self);
540 const char *data;
541 const char *data;
541
542
542 if (pos == length - 1 || pos == INT_MAX)
543 if (pos == length - 1 || pos == INT_MAX)
543 return nullid;
544 return nullid;
544
545
545 if (pos >= length)
546 if (pos >= length)
546 return NULL;
547 return NULL;
547
548
548 if (pos >= self->length - 1) {
549 if (pos >= self->length - 1) {
549 PyObject *tuple, *str;
550 PyObject *tuple, *str;
550 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
551 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
551 str = PyTuple_GetItem(tuple, 7);
552 str = PyTuple_GetItem(tuple, 7);
552 return str ? PyString_AS_STRING(str) : NULL;
553 return str ? PyString_AS_STRING(str) : NULL;
553 }
554 }
554
555
555 data = index_deref(self, pos);
556 data = index_deref(self, pos);
556 return data ? data + 32 : NULL;
557 return data ? data + 32 : NULL;
557 }
558 }
558
559
559 static int nt_insert(indexObject *self, const char *node, int rev);
560 static int nt_insert(indexObject *self, const char *node, int rev);
560
561
561 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
562 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
562 {
563 {
563 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
564 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
564 return -1;
565 return -1;
565 if (*nodelen == 20)
566 if (*nodelen == 20)
566 return 0;
567 return 0;
567 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
568 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
568 return -1;
569 return -1;
569 }
570 }
570
571
571 static PyObject *index_insert(indexObject *self, PyObject *args)
572 static PyObject *index_insert(indexObject *self, PyObject *args)
572 {
573 {
573 PyObject *obj;
574 PyObject *obj;
574 char *node;
575 char *node;
575 long offset;
576 long offset;
576 Py_ssize_t len, nodelen;
577 Py_ssize_t len, nodelen;
577
578
578 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
579 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
579 return NULL;
580 return NULL;
580
581
581 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
582 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
582 PyErr_SetString(PyExc_TypeError, "8-tuple required");
583 PyErr_SetString(PyExc_TypeError, "8-tuple required");
583 return NULL;
584 return NULL;
584 }
585 }
585
586
586 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
587 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
587 return NULL;
588 return NULL;
588
589
589 len = index_length(self);
590 len = index_length(self);
590
591
591 if (offset < 0)
592 if (offset < 0)
592 offset += len;
593 offset += len;
593
594
594 if (offset != len - 1) {
595 if (offset != len - 1) {
595 PyErr_SetString(PyExc_IndexError,
596 PyErr_SetString(PyExc_IndexError,
596 "insert only supported at index -1");
597 "insert only supported at index -1");
597 return NULL;
598 return NULL;
598 }
599 }
599
600
600 if (offset > INT_MAX) {
601 if (offset > INT_MAX) {
601 PyErr_SetString(PyExc_ValueError,
602 PyErr_SetString(PyExc_ValueError,
602 "currently only 2**31 revs supported");
603 "currently only 2**31 revs supported");
603 return NULL;
604 return NULL;
604 }
605 }
605
606
606 if (self->added == NULL) {
607 if (self->added == NULL) {
607 self->added = PyList_New(0);
608 self->added = PyList_New(0);
608 if (self->added == NULL)
609 if (self->added == NULL)
609 return NULL;
610 return NULL;
610 }
611 }
611
612
612 if (PyList_Append(self->added, obj) == -1)
613 if (PyList_Append(self->added, obj) == -1)
613 return NULL;
614 return NULL;
614
615
615 if (self->nt)
616 if (self->nt)
616 nt_insert(self, node, (int)offset);
617 nt_insert(self, node, (int)offset);
617
618
618 Py_CLEAR(self->headrevs);
619 Py_CLEAR(self->headrevs);
619 Py_RETURN_NONE;
620 Py_RETURN_NONE;
620 }
621 }
621
622
622 static void _index_clearcaches(indexObject *self)
623 static void _index_clearcaches(indexObject *self)
623 {
624 {
624 if (self->cache) {
625 if (self->cache) {
625 Py_ssize_t i;
626 Py_ssize_t i;
626
627
627 for (i = 0; i < self->raw_length; i++)
628 for (i = 0; i < self->raw_length; i++)
628 Py_CLEAR(self->cache[i]);
629 Py_CLEAR(self->cache[i]);
629 free(self->cache);
630 free(self->cache);
630 self->cache = NULL;
631 self->cache = NULL;
631 }
632 }
632 if (self->offsets) {
633 if (self->offsets) {
633 free(self->offsets);
634 free(self->offsets);
634 self->offsets = NULL;
635 self->offsets = NULL;
635 }
636 }
636 if (self->nt) {
637 if (self->nt) {
637 free(self->nt);
638 free(self->nt);
638 self->nt = NULL;
639 self->nt = NULL;
639 }
640 }
640 Py_CLEAR(self->headrevs);
641 Py_CLEAR(self->headrevs);
641 }
642 }
642
643
643 static PyObject *index_clearcaches(indexObject *self)
644 static PyObject *index_clearcaches(indexObject *self)
644 {
645 {
645 _index_clearcaches(self);
646 _index_clearcaches(self);
646 self->ntlength = self->ntcapacity = 0;
647 self->ntlength = self->ntcapacity = 0;
647 self->ntdepth = self->ntsplits = 0;
648 self->ntdepth = self->ntsplits = 0;
648 self->ntrev = -1;
649 self->ntrev = -1;
649 self->ntlookups = self->ntmisses = 0;
650 self->ntlookups = self->ntmisses = 0;
650 Py_RETURN_NONE;
651 Py_RETURN_NONE;
651 }
652 }
652
653
653 static PyObject *index_stats(indexObject *self)
654 static PyObject *index_stats(indexObject *self)
654 {
655 {
655 PyObject *obj = PyDict_New();
656 PyObject *obj = PyDict_New();
656
657
657 if (obj == NULL)
658 if (obj == NULL)
658 return NULL;
659 return NULL;
659
660
660 #define istat(__n, __d) \
661 #define istat(__n, __d) \
661 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
662 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
662 goto bail;
663 goto bail;
663
664
664 if (self->added) {
665 if (self->added) {
665 Py_ssize_t len = PyList_GET_SIZE(self->added);
666 Py_ssize_t len = PyList_GET_SIZE(self->added);
666 if (PyDict_SetItemString(obj, "index entries added",
667 if (PyDict_SetItemString(obj, "index entries added",
667 PyInt_FromSsize_t(len)) == -1)
668 PyInt_FromSsize_t(len)) == -1)
668 goto bail;
669 goto bail;
669 }
670 }
670
671
671 if (self->raw_length != self->length - 1)
672 if (self->raw_length != self->length - 1)
672 istat(raw_length, "revs on disk");
673 istat(raw_length, "revs on disk");
673 istat(length, "revs in memory");
674 istat(length, "revs in memory");
674 istat(ntcapacity, "node trie capacity");
675 istat(ntcapacity, "node trie capacity");
675 istat(ntdepth, "node trie depth");
676 istat(ntdepth, "node trie depth");
676 istat(ntlength, "node trie count");
677 istat(ntlength, "node trie count");
677 istat(ntlookups, "node trie lookups");
678 istat(ntlookups, "node trie lookups");
678 istat(ntmisses, "node trie misses");
679 istat(ntmisses, "node trie misses");
679 istat(ntrev, "node trie last rev scanned");
680 istat(ntrev, "node trie last rev scanned");
680 istat(ntsplits, "node trie splits");
681 istat(ntsplits, "node trie splits");
681
682
682 #undef istat
683 #undef istat
683
684
684 return obj;
685 return obj;
685
686
686 bail:
687 bail:
687 Py_XDECREF(obj);
688 Py_XDECREF(obj);
688 return NULL;
689 return NULL;
689 }
690 }
690
691
691 /*
692 /*
692 * When we cache a list, we want to be sure the caller can't mutate
693 * When we cache a list, we want to be sure the caller can't mutate
693 * the cached copy.
694 * the cached copy.
694 */
695 */
695 static PyObject *list_copy(PyObject *list)
696 static PyObject *list_copy(PyObject *list)
696 {
697 {
697 Py_ssize_t len = PyList_GET_SIZE(list);
698 Py_ssize_t len = PyList_GET_SIZE(list);
698 PyObject *newlist = PyList_New(len);
699 PyObject *newlist = PyList_New(len);
699 Py_ssize_t i;
700 Py_ssize_t i;
700
701
701 if (newlist == NULL)
702 if (newlist == NULL)
702 return NULL;
703 return NULL;
703
704
704 for (i = 0; i < len; i++) {
705 for (i = 0; i < len; i++) {
705 PyObject *obj = PyList_GET_ITEM(list, i);
706 PyObject *obj = PyList_GET_ITEM(list, i);
706 Py_INCREF(obj);
707 Py_INCREF(obj);
707 PyList_SET_ITEM(newlist, i, obj);
708 PyList_SET_ITEM(newlist, i, obj);
708 }
709 }
709
710
710 return newlist;
711 return newlist;
711 }
712 }
712
713
713 static PyObject *index_headrevs(indexObject *self)
714 static PyObject *index_headrevs(indexObject *self)
714 {
715 {
715 Py_ssize_t i, len, addlen;
716 Py_ssize_t i, len, addlen;
716 char *nothead = NULL;
717 char *nothead = NULL;
717 PyObject *heads;
718 PyObject *heads;
718
719
719 if (self->headrevs)
720 if (self->headrevs)
720 return list_copy(self->headrevs);
721 return list_copy(self->headrevs);
721
722
722 len = index_length(self) - 1;
723 len = index_length(self) - 1;
723 heads = PyList_New(0);
724 heads = PyList_New(0);
724 if (heads == NULL)
725 if (heads == NULL)
725 goto bail;
726 goto bail;
726 if (len == 0) {
727 if (len == 0) {
727 PyObject *nullid = PyInt_FromLong(-1);
728 PyObject *nullid = PyInt_FromLong(-1);
728 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
729 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
729 Py_XDECREF(nullid);
730 Py_XDECREF(nullid);
730 goto bail;
731 goto bail;
731 }
732 }
732 goto done;
733 goto done;
733 }
734 }
734
735
735 nothead = calloc(len, 1);
736 nothead = calloc(len, 1);
736 if (nothead == NULL)
737 if (nothead == NULL)
737 goto bail;
738 goto bail;
738
739
739 for (i = 0; i < self->raw_length; i++) {
740 for (i = 0; i < self->raw_length; i++) {
740 const char *data = index_deref(self, i);
741 const char *data = index_deref(self, i);
741 int parent_1 = getbe32(data + 24);
742 int parent_1 = getbe32(data + 24);
742 int parent_2 = getbe32(data + 28);
743 int parent_2 = getbe32(data + 28);
743 if (parent_1 >= 0)
744 if (parent_1 >= 0)
744 nothead[parent_1] = 1;
745 nothead[parent_1] = 1;
745 if (parent_2 >= 0)
746 if (parent_2 >= 0)
746 nothead[parent_2] = 1;
747 nothead[parent_2] = 1;
747 }
748 }
748
749
749 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
750 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
750
751
751 for (i = 0; i < addlen; i++) {
752 for (i = 0; i < addlen; i++) {
752 PyObject *rev = PyList_GET_ITEM(self->added, i);
753 PyObject *rev = PyList_GET_ITEM(self->added, i);
753 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
754 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
754 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
755 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
755 long parent_1, parent_2;
756 long parent_1, parent_2;
756
757
757 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
758 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
758 PyErr_SetString(PyExc_TypeError,
759 PyErr_SetString(PyExc_TypeError,
759 "revlog parents are invalid");
760 "revlog parents are invalid");
760 goto bail;
761 goto bail;
761 }
762 }
762 parent_1 = PyInt_AS_LONG(p1);
763 parent_1 = PyInt_AS_LONG(p1);
763 parent_2 = PyInt_AS_LONG(p2);
764 parent_2 = PyInt_AS_LONG(p2);
764 if (parent_1 >= 0)
765 if (parent_1 >= 0)
765 nothead[parent_1] = 1;
766 nothead[parent_1] = 1;
766 if (parent_2 >= 0)
767 if (parent_2 >= 0)
767 nothead[parent_2] = 1;
768 nothead[parent_2] = 1;
768 }
769 }
769
770
770 for (i = 0; i < len; i++) {
771 for (i = 0; i < len; i++) {
771 PyObject *head;
772 PyObject *head;
772
773
773 if (nothead[i])
774 if (nothead[i])
774 continue;
775 continue;
775 head = PyInt_FromLong(i);
776 head = PyInt_FromLong(i);
776 if (head == NULL || PyList_Append(heads, head) == -1) {
777 if (head == NULL || PyList_Append(heads, head) == -1) {
777 Py_XDECREF(head);
778 Py_XDECREF(head);
778 goto bail;
779 goto bail;
779 }
780 }
780 }
781 }
781
782
782 done:
783 done:
783 self->headrevs = heads;
784 self->headrevs = heads;
784 free(nothead);
785 free(nothead);
785 return list_copy(self->headrevs);
786 return list_copy(self->headrevs);
786 bail:
787 bail:
787 Py_XDECREF(heads);
788 Py_XDECREF(heads);
788 free(nothead);
789 free(nothead);
789 return NULL;
790 return NULL;
790 }
791 }
791
792
792 static inline int nt_level(const char *node, Py_ssize_t level)
793 static inline int nt_level(const char *node, Py_ssize_t level)
793 {
794 {
794 int v = node[level>>1];
795 int v = node[level>>1];
795 if (!(level & 1))
796 if (!(level & 1))
796 v >>= 4;
797 v >>= 4;
797 return v & 0xf;
798 return v & 0xf;
798 }
799 }
799
800
800 /*
801 /*
801 * Return values:
802 * Return values:
802 *
803 *
803 * -4: match is ambiguous (multiple candidates)
804 * -4: match is ambiguous (multiple candidates)
804 * -2: not found
805 * -2: not found
805 * rest: valid rev
806 * rest: valid rev
806 */
807 */
807 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
808 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
808 int hex)
809 int hex)
809 {
810 {
810 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
811 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
811 int level, maxlevel, off;
812 int level, maxlevel, off;
812
813
813 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
814 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
814 return -1;
815 return -1;
815
816
816 if (self->nt == NULL)
817 if (self->nt == NULL)
817 return -2;
818 return -2;
818
819
819 if (hex)
820 if (hex)
820 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
821 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
821 else
822 else
822 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
823 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
823
824
824 for (level = off = 0; level < maxlevel; level++) {
825 for (level = off = 0; level < maxlevel; level++) {
825 int k = getnybble(node, level);
826 int k = getnybble(node, level);
826 nodetree *n = &self->nt[off];
827 nodetree *n = &self->nt[off];
827 int v = n->children[k];
828 int v = n->children[k];
828
829
829 if (v < 0) {
830 if (v < 0) {
830 const char *n;
831 const char *n;
831 Py_ssize_t i;
832 Py_ssize_t i;
832
833
833 v = -v - 1;
834 v = -v - 1;
834 n = index_node(self, v);
835 n = index_node(self, v);
835 if (n == NULL)
836 if (n == NULL)
836 return -2;
837 return -2;
837 for (i = level; i < maxlevel; i++)
838 for (i = level; i < maxlevel; i++)
838 if (getnybble(node, i) != nt_level(n, i))
839 if (getnybble(node, i) != nt_level(n, i))
839 return -2;
840 return -2;
840 return v;
841 return v;
841 }
842 }
842 if (v == 0)
843 if (v == 0)
843 return -2;
844 return -2;
844 off = v;
845 off = v;
845 }
846 }
846 /* multiple matches against an ambiguous prefix */
847 /* multiple matches against an ambiguous prefix */
847 return -4;
848 return -4;
848 }
849 }
849
850
850 static int nt_new(indexObject *self)
851 static int nt_new(indexObject *self)
851 {
852 {
852 if (self->ntlength == self->ntcapacity) {
853 if (self->ntlength == self->ntcapacity) {
853 self->ntcapacity *= 2;
854 self->ntcapacity *= 2;
854 self->nt = realloc(self->nt,
855 self->nt = realloc(self->nt,
855 self->ntcapacity * sizeof(nodetree));
856 self->ntcapacity * sizeof(nodetree));
856 if (self->nt == NULL) {
857 if (self->nt == NULL) {
857 PyErr_SetString(PyExc_MemoryError, "out of memory");
858 PyErr_SetString(PyExc_MemoryError, "out of memory");
858 return -1;
859 return -1;
859 }
860 }
860 memset(&self->nt[self->ntlength], 0,
861 memset(&self->nt[self->ntlength], 0,
861 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
862 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
862 }
863 }
863 return self->ntlength++;
864 return self->ntlength++;
864 }
865 }
865
866
866 static int nt_insert(indexObject *self, const char *node, int rev)
867 static int nt_insert(indexObject *self, const char *node, int rev)
867 {
868 {
868 int level = 0;
869 int level = 0;
869 int off = 0;
870 int off = 0;
870
871
871 while (level < 40) {
872 while (level < 40) {
872 int k = nt_level(node, level);
873 int k = nt_level(node, level);
873 nodetree *n;
874 nodetree *n;
874 int v;
875 int v;
875
876
876 n = &self->nt[off];
877 n = &self->nt[off];
877 v = n->children[k];
878 v = n->children[k];
878
879
879 if (v == 0) {
880 if (v == 0) {
880 n->children[k] = -rev - 1;
881 n->children[k] = -rev - 1;
881 return 0;
882 return 0;
882 }
883 }
883 if (v < 0) {
884 if (v < 0) {
884 const char *oldnode = index_node(self, -v - 1);
885 const char *oldnode = index_node(self, -v - 1);
885 int noff;
886 int noff;
886
887
887 if (!oldnode || !memcmp(oldnode, node, 20)) {
888 if (!oldnode || !memcmp(oldnode, node, 20)) {
888 n->children[k] = -rev - 1;
889 n->children[k] = -rev - 1;
889 return 0;
890 return 0;
890 }
891 }
891 noff = nt_new(self);
892 noff = nt_new(self);
892 if (noff == -1)
893 if (noff == -1)
893 return -1;
894 return -1;
894 /* self->nt may have been changed by realloc */
895 /* self->nt may have been changed by realloc */
895 self->nt[off].children[k] = noff;
896 self->nt[off].children[k] = noff;
896 off = noff;
897 off = noff;
897 n = &self->nt[off];
898 n = &self->nt[off];
898 n->children[nt_level(oldnode, ++level)] = v;
899 n->children[nt_level(oldnode, ++level)] = v;
899 if (level > self->ntdepth)
900 if (level > self->ntdepth)
900 self->ntdepth = level;
901 self->ntdepth = level;
901 self->ntsplits += 1;
902 self->ntsplits += 1;
902 } else {
903 } else {
903 level += 1;
904 level += 1;
904 off = v;
905 off = v;
905 }
906 }
906 }
907 }
907
908
908 return -1;
909 return -1;
909 }
910 }
910
911
911 static int nt_init(indexObject *self)
912 static int nt_init(indexObject *self)
912 {
913 {
913 if (self->nt == NULL) {
914 if (self->nt == NULL) {
914 self->ntcapacity = self->raw_length < 4
915 self->ntcapacity = self->raw_length < 4
915 ? 4 : self->raw_length / 2;
916 ? 4 : self->raw_length / 2;
916 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
917 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
917 if (self->nt == NULL) {
918 if (self->nt == NULL) {
918 PyErr_NoMemory();
919 PyErr_NoMemory();
919 return -1;
920 return -1;
920 }
921 }
921 self->ntlength = 1;
922 self->ntlength = 1;
922 self->ntrev = (int)index_length(self) - 1;
923 self->ntrev = (int)index_length(self) - 1;
923 self->ntlookups = 1;
924 self->ntlookups = 1;
924 self->ntmisses = 0;
925 self->ntmisses = 0;
925 if (nt_insert(self, nullid, INT_MAX) == -1)
926 if (nt_insert(self, nullid, INT_MAX) == -1)
926 return -1;
927 return -1;
927 }
928 }
928 return 0;
929 return 0;
929 }
930 }
930
931
931 /*
932 /*
932 * Return values:
933 * Return values:
933 *
934 *
934 * -3: error (exception set)
935 * -3: error (exception set)
935 * -2: not found (no exception set)
936 * -2: not found (no exception set)
936 * rest: valid rev
937 * rest: valid rev
937 */
938 */
938 static int index_find_node(indexObject *self,
939 static int index_find_node(indexObject *self,
939 const char *node, Py_ssize_t nodelen)
940 const char *node, Py_ssize_t nodelen)
940 {
941 {
941 int rev;
942 int rev;
942
943
943 self->ntlookups++;
944 self->ntlookups++;
944 rev = nt_find(self, node, nodelen, 0);
945 rev = nt_find(self, node, nodelen, 0);
945 if (rev >= -1)
946 if (rev >= -1)
946 return rev;
947 return rev;
947
948
948 if (nt_init(self) == -1)
949 if (nt_init(self) == -1)
949 return -3;
950 return -3;
950
951
951 /*
952 /*
952 * For the first handful of lookups, we scan the entire index,
953 * For the first handful of lookups, we scan the entire index,
953 * and cache only the matching nodes. This optimizes for cases
954 * and cache only the matching nodes. This optimizes for cases
954 * like "hg tip", where only a few nodes are accessed.
955 * like "hg tip", where only a few nodes are accessed.
955 *
956 *
956 * After that, we cache every node we visit, using a single
957 * After that, we cache every node we visit, using a single
957 * scan amortized over multiple lookups. This gives the best
958 * scan amortized over multiple lookups. This gives the best
958 * bulk performance, e.g. for "hg log".
959 * bulk performance, e.g. for "hg log".
959 */
960 */
960 if (self->ntmisses++ < 4) {
961 if (self->ntmisses++ < 4) {
961 for (rev = self->ntrev - 1; rev >= 0; rev--) {
962 for (rev = self->ntrev - 1; rev >= 0; rev--) {
962 const char *n = index_node(self, rev);
963 const char *n = index_node(self, rev);
963 if (n == NULL)
964 if (n == NULL)
964 return -2;
965 return -2;
965 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
966 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
966 if (nt_insert(self, n, rev) == -1)
967 if (nt_insert(self, n, rev) == -1)
967 return -3;
968 return -3;
968 break;
969 break;
969 }
970 }
970 }
971 }
971 } else {
972 } else {
972 for (rev = self->ntrev - 1; rev >= 0; rev--) {
973 for (rev = self->ntrev - 1; rev >= 0; rev--) {
973 const char *n = index_node(self, rev);
974 const char *n = index_node(self, rev);
974 if (n == NULL) {
975 if (n == NULL) {
975 self->ntrev = rev + 1;
976 self->ntrev = rev + 1;
976 return -2;
977 return -2;
977 }
978 }
978 if (nt_insert(self, n, rev) == -1) {
979 if (nt_insert(self, n, rev) == -1) {
979 self->ntrev = rev + 1;
980 self->ntrev = rev + 1;
980 return -3;
981 return -3;
981 }
982 }
982 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
983 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
983 break;
984 break;
984 }
985 }
985 }
986 }
986 self->ntrev = rev;
987 self->ntrev = rev;
987 }
988 }
988
989
989 if (rev >= 0)
990 if (rev >= 0)
990 return rev;
991 return rev;
991 return -2;
992 return -2;
992 }
993 }
993
994
994 static PyObject *raise_revlog_error(void)
995 static PyObject *raise_revlog_error(void)
995 {
996 {
996 static PyObject *errclass;
997 static PyObject *errclass;
997 PyObject *mod = NULL, *errobj;
998 PyObject *mod = NULL, *errobj;
998
999
999 if (errclass == NULL) {
1000 if (errclass == NULL) {
1000 PyObject *dict;
1001 PyObject *dict;
1001
1002
1002 mod = PyImport_ImportModule("mercurial.error");
1003 mod = PyImport_ImportModule("mercurial.error");
1003 if (mod == NULL)
1004 if (mod == NULL)
1004 goto classfail;
1005 goto classfail;
1005
1006
1006 dict = PyModule_GetDict(mod);
1007 dict = PyModule_GetDict(mod);
1007 if (dict == NULL)
1008 if (dict == NULL)
1008 goto classfail;
1009 goto classfail;
1009
1010
1010 errclass = PyDict_GetItemString(dict, "RevlogError");
1011 errclass = PyDict_GetItemString(dict, "RevlogError");
1011 if (errclass == NULL) {
1012 if (errclass == NULL) {
1012 PyErr_SetString(PyExc_SystemError,
1013 PyErr_SetString(PyExc_SystemError,
1013 "could not find RevlogError");
1014 "could not find RevlogError");
1014 goto classfail;
1015 goto classfail;
1015 }
1016 }
1016 Py_INCREF(errclass);
1017 Py_INCREF(errclass);
1017 }
1018 }
1018
1019
1019 errobj = PyObject_CallFunction(errclass, NULL);
1020 errobj = PyObject_CallFunction(errclass, NULL);
1020 if (errobj == NULL)
1021 if (errobj == NULL)
1021 return NULL;
1022 return NULL;
1022 PyErr_SetObject(errclass, errobj);
1023 PyErr_SetObject(errclass, errobj);
1023 return errobj;
1024 return errobj;
1024
1025
1025 classfail:
1026 classfail:
1026 Py_XDECREF(mod);
1027 Py_XDECREF(mod);
1027 return NULL;
1028 return NULL;
1028 }
1029 }
1029
1030
1030 static PyObject *index_getitem(indexObject *self, PyObject *value)
1031 static PyObject *index_getitem(indexObject *self, PyObject *value)
1031 {
1032 {
1032 char *node;
1033 char *node;
1033 Py_ssize_t nodelen;
1034 Py_ssize_t nodelen;
1034 int rev;
1035 int rev;
1035
1036
1036 if (PyInt_Check(value))
1037 if (PyInt_Check(value))
1037 return index_get(self, PyInt_AS_LONG(value));
1038 return index_get(self, PyInt_AS_LONG(value));
1038
1039
1039 if (node_check(value, &node, &nodelen) == -1)
1040 if (node_check(value, &node, &nodelen) == -1)
1040 return NULL;
1041 return NULL;
1041 rev = index_find_node(self, node, nodelen);
1042 rev = index_find_node(self, node, nodelen);
1042 if (rev >= -1)
1043 if (rev >= -1)
1043 return PyInt_FromLong(rev);
1044 return PyInt_FromLong(rev);
1044 if (rev == -2)
1045 if (rev == -2)
1045 raise_revlog_error();
1046 raise_revlog_error();
1046 return NULL;
1047 return NULL;
1047 }
1048 }
1048
1049
1049 static int nt_partialmatch(indexObject *self, const char *node,
1050 static int nt_partialmatch(indexObject *self, const char *node,
1050 Py_ssize_t nodelen)
1051 Py_ssize_t nodelen)
1051 {
1052 {
1052 int rev;
1053 int rev;
1053
1054
1054 if (nt_init(self) == -1)
1055 if (nt_init(self) == -1)
1055 return -3;
1056 return -3;
1056
1057
1057 if (self->ntrev > 0) {
1058 if (self->ntrev > 0) {
1058 /* ensure that the radix tree is fully populated */
1059 /* ensure that the radix tree is fully populated */
1059 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1060 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1060 const char *n = index_node(self, rev);
1061 const char *n = index_node(self, rev);
1061 if (n == NULL)
1062 if (n == NULL)
1062 return -2;
1063 return -2;
1063 if (nt_insert(self, n, rev) == -1)
1064 if (nt_insert(self, n, rev) == -1)
1064 return -3;
1065 return -3;
1065 }
1066 }
1066 self->ntrev = rev;
1067 self->ntrev = rev;
1067 }
1068 }
1068
1069
1069 return nt_find(self, node, nodelen, 1);
1070 return nt_find(self, node, nodelen, 1);
1070 }
1071 }
1071
1072
1072 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1073 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1073 {
1074 {
1074 const char *fullnode;
1075 const char *fullnode;
1075 int nodelen;
1076 int nodelen;
1076 char *node;
1077 char *node;
1077 int rev, i;
1078 int rev, i;
1078
1079
1079 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1080 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1080 return NULL;
1081 return NULL;
1081
1082
1082 if (nodelen < 4) {
1083 if (nodelen < 4) {
1083 PyErr_SetString(PyExc_ValueError, "key too short");
1084 PyErr_SetString(PyExc_ValueError, "key too short");
1084 return NULL;
1085 return NULL;
1085 }
1086 }
1086
1087
1087 if (nodelen > 40) {
1088 if (nodelen > 40) {
1088 PyErr_SetString(PyExc_ValueError, "key too long");
1089 PyErr_SetString(PyExc_ValueError, "key too long");
1089 return NULL;
1090 return NULL;
1090 }
1091 }
1091
1092
1092 for (i = 0; i < nodelen; i++)
1093 for (i = 0; i < nodelen; i++)
1093 hexdigit(node, i);
1094 hexdigit(node, i);
1094 if (PyErr_Occurred()) {
1095 if (PyErr_Occurred()) {
1095 /* input contains non-hex characters */
1096 /* input contains non-hex characters */
1096 PyErr_Clear();
1097 PyErr_Clear();
1097 Py_RETURN_NONE;
1098 Py_RETURN_NONE;
1098 }
1099 }
1099
1100
1100 rev = nt_partialmatch(self, node, nodelen);
1101 rev = nt_partialmatch(self, node, nodelen);
1101
1102
1102 switch (rev) {
1103 switch (rev) {
1103 case -4:
1104 case -4:
1104 raise_revlog_error();
1105 raise_revlog_error();
1105 case -3:
1106 case -3:
1106 return NULL;
1107 return NULL;
1107 case -2:
1108 case -2:
1108 Py_RETURN_NONE;
1109 Py_RETURN_NONE;
1109 case -1:
1110 case -1:
1110 return PyString_FromStringAndSize(nullid, 20);
1111 return PyString_FromStringAndSize(nullid, 20);
1111 }
1112 }
1112
1113
1113 fullnode = index_node(self, rev);
1114 fullnode = index_node(self, rev);
1114 if (fullnode == NULL) {
1115 if (fullnode == NULL) {
1115 PyErr_Format(PyExc_IndexError,
1116 PyErr_Format(PyExc_IndexError,
1116 "could not access rev %d", rev);
1117 "could not access rev %d", rev);
1117 return NULL;
1118 return NULL;
1118 }
1119 }
1119 return PyString_FromStringAndSize(fullnode, 20);
1120 return PyString_FromStringAndSize(fullnode, 20);
1120 }
1121 }
1121
1122
1122 static PyObject *index_m_get(indexObject *self, PyObject *args)
1123 static PyObject *index_m_get(indexObject *self, PyObject *args)
1123 {
1124 {
1124 Py_ssize_t nodelen;
1125 Py_ssize_t nodelen;
1125 PyObject *val;
1126 PyObject *val;
1126 char *node;
1127 char *node;
1127 int rev;
1128 int rev;
1128
1129
1129 if (!PyArg_ParseTuple(args, "O", &val))
1130 if (!PyArg_ParseTuple(args, "O", &val))
1130 return NULL;
1131 return NULL;
1131 if (node_check(val, &node, &nodelen) == -1)
1132 if (node_check(val, &node, &nodelen) == -1)
1132 return NULL;
1133 return NULL;
1133 rev = index_find_node(self, node, nodelen);
1134 rev = index_find_node(self, node, nodelen);
1134 if (rev == -3)
1135 if (rev == -3)
1135 return NULL;
1136 return NULL;
1136 if (rev == -2)
1137 if (rev == -2)
1137 Py_RETURN_NONE;
1138 Py_RETURN_NONE;
1138 return PyInt_FromLong(rev);
1139 return PyInt_FromLong(rev);
1139 }
1140 }
1140
1141
1141 static int index_contains(indexObject *self, PyObject *value)
1142 static int index_contains(indexObject *self, PyObject *value)
1142 {
1143 {
1143 char *node;
1144 char *node;
1144 Py_ssize_t nodelen;
1145 Py_ssize_t nodelen;
1145
1146
1146 if (PyInt_Check(value)) {
1147 if (PyInt_Check(value)) {
1147 long rev = PyInt_AS_LONG(value);
1148 long rev = PyInt_AS_LONG(value);
1148 return rev >= -1 && rev < index_length(self);
1149 return rev >= -1 && rev < index_length(self);
1149 }
1150 }
1150
1151
1151 if (node_check(value, &node, &nodelen) == -1)
1152 if (node_check(value, &node, &nodelen) == -1)
1152 return -1;
1153 return -1;
1153
1154
1154 switch (index_find_node(self, node, nodelen)) {
1155 switch (index_find_node(self, node, nodelen)) {
1155 case -3:
1156 case -3:
1156 return -1;
1157 return -1;
1157 case -2:
1158 case -2:
1158 return 0;
1159 return 0;
1159 default:
1160 default:
1160 return 1;
1161 return 1;
1161 }
1162 }
1162 }
1163 }
1163
1164
1164 /*
1165 /*
1165 * Invalidate any trie entries introduced by added revs.
1166 * Invalidate any trie entries introduced by added revs.
1166 */
1167 */
1167 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1168 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1168 {
1169 {
1169 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1170 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1170
1171
1171 for (i = start; i < len; i++) {
1172 for (i = start; i < len; i++) {
1172 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1173 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1173 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1174 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1174
1175
1175 nt_insert(self, PyString_AS_STRING(node), -1);
1176 nt_insert(self, PyString_AS_STRING(node), -1);
1176 }
1177 }
1177
1178
1178 if (start == 0)
1179 if (start == 0)
1179 Py_CLEAR(self->added);
1180 Py_CLEAR(self->added);
1180 }
1181 }
1181
1182
1182 /*
1183 /*
1183 * Delete a numeric range of revs, which must be at the end of the
1184 * Delete a numeric range of revs, which must be at the end of the
1184 * range, but exclude the sentinel nullid entry.
1185 * range, but exclude the sentinel nullid entry.
1185 */
1186 */
1186 static int index_slice_del(indexObject *self, PyObject *item)
1187 static int index_slice_del(indexObject *self, PyObject *item)
1187 {
1188 {
1188 Py_ssize_t start, stop, step, slicelength;
1189 Py_ssize_t start, stop, step, slicelength;
1189 Py_ssize_t length = index_length(self);
1190 Py_ssize_t length = index_length(self);
1190 int ret = 0;
1191 int ret = 0;
1191
1192
1192 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1193 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1193 &start, &stop, &step, &slicelength) < 0)
1194 &start, &stop, &step, &slicelength) < 0)
1194 return -1;
1195 return -1;
1195
1196
1196 if (slicelength <= 0)
1197 if (slicelength <= 0)
1197 return 0;
1198 return 0;
1198
1199
1199 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1200 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1200 stop = start;
1201 stop = start;
1201
1202
1202 if (step < 0) {
1203 if (step < 0) {
1203 stop = start + 1;
1204 stop = start + 1;
1204 start = stop + step*(slicelength - 1) - 1;
1205 start = stop + step*(slicelength - 1) - 1;
1205 step = -step;
1206 step = -step;
1206 }
1207 }
1207
1208
1208 if (step != 1) {
1209 if (step != 1) {
1209 PyErr_SetString(PyExc_ValueError,
1210 PyErr_SetString(PyExc_ValueError,
1210 "revlog index delete requires step size of 1");
1211 "revlog index delete requires step size of 1");
1211 return -1;
1212 return -1;
1212 }
1213 }
1213
1214
1214 if (stop != length - 1) {
1215 if (stop != length - 1) {
1215 PyErr_SetString(PyExc_IndexError,
1216 PyErr_SetString(PyExc_IndexError,
1216 "revlog index deletion indices are invalid");
1217 "revlog index deletion indices are invalid");
1217 return -1;
1218 return -1;
1218 }
1219 }
1219
1220
1220 if (start < self->length - 1) {
1221 if (start < self->length - 1) {
1221 if (self->nt) {
1222 if (self->nt) {
1222 Py_ssize_t i;
1223 Py_ssize_t i;
1223
1224
1224 for (i = start + 1; i < self->length - 1; i++) {
1225 for (i = start + 1; i < self->length - 1; i++) {
1225 const char *node = index_node(self, i);
1226 const char *node = index_node(self, i);
1226
1227
1227 if (node)
1228 if (node)
1228 nt_insert(self, node, -1);
1229 nt_insert(self, node, -1);
1229 }
1230 }
1230 if (self->added)
1231 if (self->added)
1231 nt_invalidate_added(self, 0);
1232 nt_invalidate_added(self, 0);
1232 if (self->ntrev > start)
1233 if (self->ntrev > start)
1233 self->ntrev = (int)start;
1234 self->ntrev = (int)start;
1234 }
1235 }
1235 self->length = start + 1;
1236 self->length = start + 1;
1236 if (start < self->raw_length)
1237 if (start < self->raw_length)
1237 self->raw_length = start;
1238 self->raw_length = start;
1238 goto done;
1239 goto done;
1239 }
1240 }
1240
1241
1241 if (self->nt) {
1242 if (self->nt) {
1242 nt_invalidate_added(self, start - self->length + 1);
1243 nt_invalidate_added(self, start - self->length + 1);
1243 if (self->ntrev > start)
1244 if (self->ntrev > start)
1244 self->ntrev = (int)start;
1245 self->ntrev = (int)start;
1245 }
1246 }
1246 if (self->added)
1247 if (self->added)
1247 ret = PyList_SetSlice(self->added, start - self->length + 1,
1248 ret = PyList_SetSlice(self->added, start - self->length + 1,
1248 PyList_GET_SIZE(self->added), NULL);
1249 PyList_GET_SIZE(self->added), NULL);
1249 done:
1250 done:
1250 Py_CLEAR(self->headrevs);
1251 Py_CLEAR(self->headrevs);
1251 return ret;
1252 return ret;
1252 }
1253 }
1253
1254
1254 /*
1255 /*
1255 * Supported ops:
1256 * Supported ops:
1256 *
1257 *
1257 * slice deletion
1258 * slice deletion
1258 * string assignment (extend node->rev mapping)
1259 * string assignment (extend node->rev mapping)
1259 * string deletion (shrink node->rev mapping)
1260 * string deletion (shrink node->rev mapping)
1260 */
1261 */
1261 static int index_assign_subscript(indexObject *self, PyObject *item,
1262 static int index_assign_subscript(indexObject *self, PyObject *item,
1262 PyObject *value)
1263 PyObject *value)
1263 {
1264 {
1264 char *node;
1265 char *node;
1265 Py_ssize_t nodelen;
1266 Py_ssize_t nodelen;
1266 long rev;
1267 long rev;
1267
1268
1268 if (PySlice_Check(item) && value == NULL)
1269 if (PySlice_Check(item) && value == NULL)
1269 return index_slice_del(self, item);
1270 return index_slice_del(self, item);
1270
1271
1271 if (node_check(item, &node, &nodelen) == -1)
1272 if (node_check(item, &node, &nodelen) == -1)
1272 return -1;
1273 return -1;
1273
1274
1274 if (value == NULL)
1275 if (value == NULL)
1275 return self->nt ? nt_insert(self, node, -1) : 0;
1276 return self->nt ? nt_insert(self, node, -1) : 0;
1276 rev = PyInt_AsLong(value);
1277 rev = PyInt_AsLong(value);
1277 if (rev > INT_MAX || rev < 0) {
1278 if (rev > INT_MAX || rev < 0) {
1278 if (!PyErr_Occurred())
1279 if (!PyErr_Occurred())
1279 PyErr_SetString(PyExc_ValueError, "rev out of range");
1280 PyErr_SetString(PyExc_ValueError, "rev out of range");
1280 return -1;
1281 return -1;
1281 }
1282 }
1282 return nt_insert(self, node, (int)rev);
1283 return nt_insert(self, node, (int)rev);
1283 }
1284 }
1284
1285
1285 /*
1286 /*
1286 * Find all RevlogNG entries in an index that has inline data. Update
1287 * Find all RevlogNG entries in an index that has inline data. Update
1287 * the optional "offsets" table with those entries.
1288 * the optional "offsets" table with those entries.
1288 */
1289 */
1289 static long inline_scan(indexObject *self, const char **offsets)
1290 static long inline_scan(indexObject *self, const char **offsets)
1290 {
1291 {
1291 const char *data = PyString_AS_STRING(self->data);
1292 const char *data = PyString_AS_STRING(self->data);
1292 const char *end = data + PyString_GET_SIZE(self->data);
1293 const char *end = data + PyString_GET_SIZE(self->data);
1293 long incr = v1_hdrsize;
1294 long incr = v1_hdrsize;
1294 Py_ssize_t len = 0;
1295 Py_ssize_t len = 0;
1295
1296
1296 while (data + v1_hdrsize <= end) {
1297 while (data + v1_hdrsize <= end) {
1297 uint32_t comp_len;
1298 uint32_t comp_len;
1298 const char *old_data;
1299 const char *old_data;
1299 /* 3rd element of header is length of compressed inline data */
1300 /* 3rd element of header is length of compressed inline data */
1300 comp_len = getbe32(data + 8);
1301 comp_len = getbe32(data + 8);
1301 incr = v1_hdrsize + comp_len;
1302 incr = v1_hdrsize + comp_len;
1302 if (incr < v1_hdrsize)
1303 if (incr < v1_hdrsize)
1303 break;
1304 break;
1304 if (offsets)
1305 if (offsets)
1305 offsets[len] = data;
1306 offsets[len] = data;
1306 len++;
1307 len++;
1307 old_data = data;
1308 old_data = data;
1308 data += incr;
1309 data += incr;
1309 if (data <= old_data)
1310 if (data <= old_data)
1310 break;
1311 break;
1311 }
1312 }
1312
1313
1313 if (data != end && data + v1_hdrsize != end) {
1314 if (data != end && data + v1_hdrsize != end) {
1314 if (!PyErr_Occurred())
1315 if (!PyErr_Occurred())
1315 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1316 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1316 return -1;
1317 return -1;
1317 }
1318 }
1318
1319
1319 return len;
1320 return len;
1320 }
1321 }
1321
1322
1322 static int index_init(indexObject *self, PyObject *args)
1323 static int index_init(indexObject *self, PyObject *args)
1323 {
1324 {
1324 PyObject *data_obj, *inlined_obj;
1325 PyObject *data_obj, *inlined_obj;
1325 Py_ssize_t size;
1326 Py_ssize_t size;
1326
1327
1327 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1328 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1328 return -1;
1329 return -1;
1329 if (!PyString_Check(data_obj)) {
1330 if (!PyString_Check(data_obj)) {
1330 PyErr_SetString(PyExc_TypeError, "data is not a string");
1331 PyErr_SetString(PyExc_TypeError, "data is not a string");
1331 return -1;
1332 return -1;
1332 }
1333 }
1333 size = PyString_GET_SIZE(data_obj);
1334 size = PyString_GET_SIZE(data_obj);
1334
1335
1335 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1336 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1336 self->data = data_obj;
1337 self->data = data_obj;
1337 self->cache = NULL;
1338 self->cache = NULL;
1338
1339
1339 self->added = NULL;
1340 self->added = NULL;
1340 self->headrevs = NULL;
1341 self->headrevs = NULL;
1341 self->offsets = NULL;
1342 self->offsets = NULL;
1342 self->nt = NULL;
1343 self->nt = NULL;
1343 self->ntlength = self->ntcapacity = 0;
1344 self->ntlength = self->ntcapacity = 0;
1344 self->ntdepth = self->ntsplits = 0;
1345 self->ntdepth = self->ntsplits = 0;
1345 self->ntlookups = self->ntmisses = 0;
1346 self->ntlookups = self->ntmisses = 0;
1346 self->ntrev = -1;
1347 self->ntrev = -1;
1347 Py_INCREF(self->data);
1348 Py_INCREF(self->data);
1348
1349
1349 if (self->inlined) {
1350 if (self->inlined) {
1350 long len = inline_scan(self, NULL);
1351 long len = inline_scan(self, NULL);
1351 if (len == -1)
1352 if (len == -1)
1352 goto bail;
1353 goto bail;
1353 self->raw_length = len;
1354 self->raw_length = len;
1354 self->length = len + 1;
1355 self->length = len + 1;
1355 } else {
1356 } else {
1356 if (size % v1_hdrsize) {
1357 if (size % v1_hdrsize) {
1357 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1358 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1358 goto bail;
1359 goto bail;
1359 }
1360 }
1360 self->raw_length = size / v1_hdrsize;
1361 self->raw_length = size / v1_hdrsize;
1361 self->length = self->raw_length + 1;
1362 self->length = self->raw_length + 1;
1362 }
1363 }
1363
1364
1364 return 0;
1365 return 0;
1365 bail:
1366 bail:
1366 return -1;
1367 return -1;
1367 }
1368 }
1368
1369
1369 static PyObject *index_nodemap(indexObject *self)
1370 static PyObject *index_nodemap(indexObject *self)
1370 {
1371 {
1371 Py_INCREF(self);
1372 Py_INCREF(self);
1372 return (PyObject *)self;
1373 return (PyObject *)self;
1373 }
1374 }
1374
1375
1375 static void index_dealloc(indexObject *self)
1376 static void index_dealloc(indexObject *self)
1376 {
1377 {
1377 _index_clearcaches(self);
1378 _index_clearcaches(self);
1378 Py_DECREF(self->data);
1379 Py_DECREF(self->data);
1379 Py_XDECREF(self->added);
1380 Py_XDECREF(self->added);
1380 PyObject_Del(self);
1381 PyObject_Del(self);
1381 }
1382 }
1382
1383
1383 static PySequenceMethods index_sequence_methods = {
1384 static PySequenceMethods index_sequence_methods = {
1384 (lenfunc)index_length, /* sq_length */
1385 (lenfunc)index_length, /* sq_length */
1385 0, /* sq_concat */
1386 0, /* sq_concat */
1386 0, /* sq_repeat */
1387 0, /* sq_repeat */
1387 (ssizeargfunc)index_get, /* sq_item */
1388 (ssizeargfunc)index_get, /* sq_item */
1388 0, /* sq_slice */
1389 0, /* sq_slice */
1389 0, /* sq_ass_item */
1390 0, /* sq_ass_item */
1390 0, /* sq_ass_slice */
1391 0, /* sq_ass_slice */
1391 (objobjproc)index_contains, /* sq_contains */
1392 (objobjproc)index_contains, /* sq_contains */
1392 };
1393 };
1393
1394
1394 static PyMappingMethods index_mapping_methods = {
1395 static PyMappingMethods index_mapping_methods = {
1395 (lenfunc)index_length, /* mp_length */
1396 (lenfunc)index_length, /* mp_length */
1396 (binaryfunc)index_getitem, /* mp_subscript */
1397 (binaryfunc)index_getitem, /* mp_subscript */
1397 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1398 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1398 };
1399 };
1399
1400
1400 static PyMethodDef index_methods[] = {
1401 static PyMethodDef index_methods[] = {
1401 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1402 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1402 "clear the index caches"},
1403 "clear the index caches"},
1403 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1404 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1404 "get an index entry"},
1405 "get an index entry"},
1405 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1406 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1406 "get head revisions"},
1407 "get head revisions"},
1407 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1408 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1408 "insert an index entry"},
1409 "insert an index entry"},
1409 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1410 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1410 "match a potentially ambiguous node ID"},
1411 "match a potentially ambiguous node ID"},
1411 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1412 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1412 "stats for the index"},
1413 "stats for the index"},
1413 {NULL} /* Sentinel */
1414 {NULL} /* Sentinel */
1414 };
1415 };
1415
1416
1416 static PyGetSetDef index_getset[] = {
1417 static PyGetSetDef index_getset[] = {
1417 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1418 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1418 {NULL} /* Sentinel */
1419 {NULL} /* Sentinel */
1419 };
1420 };
1420
1421
1421 static PyTypeObject indexType = {
1422 static PyTypeObject indexType = {
1422 PyObject_HEAD_INIT(NULL)
1423 PyObject_HEAD_INIT(NULL)
1423 0, /* ob_size */
1424 0, /* ob_size */
1424 "parsers.index", /* tp_name */
1425 "parsers.index", /* tp_name */
1425 sizeof(indexObject), /* tp_basicsize */
1426 sizeof(indexObject), /* tp_basicsize */
1426 0, /* tp_itemsize */
1427 0, /* tp_itemsize */
1427 (destructor)index_dealloc, /* tp_dealloc */
1428 (destructor)index_dealloc, /* tp_dealloc */
1428 0, /* tp_print */
1429 0, /* tp_print */
1429 0, /* tp_getattr */
1430 0, /* tp_getattr */
1430 0, /* tp_setattr */
1431 0, /* tp_setattr */
1431 0, /* tp_compare */
1432 0, /* tp_compare */
1432 0, /* tp_repr */
1433 0, /* tp_repr */
1433 0, /* tp_as_number */
1434 0, /* tp_as_number */
1434 &index_sequence_methods, /* tp_as_sequence */
1435 &index_sequence_methods, /* tp_as_sequence */
1435 &index_mapping_methods, /* tp_as_mapping */
1436 &index_mapping_methods, /* tp_as_mapping */
1436 0, /* tp_hash */
1437 0, /* tp_hash */
1437 0, /* tp_call */
1438 0, /* tp_call */
1438 0, /* tp_str */
1439 0, /* tp_str */
1439 0, /* tp_getattro */
1440 0, /* tp_getattro */
1440 0, /* tp_setattro */
1441 0, /* tp_setattro */
1441 0, /* tp_as_buffer */
1442 0, /* tp_as_buffer */
1442 Py_TPFLAGS_DEFAULT, /* tp_flags */
1443 Py_TPFLAGS_DEFAULT, /* tp_flags */
1443 "revlog index", /* tp_doc */
1444 "revlog index", /* tp_doc */
1444 0, /* tp_traverse */
1445 0, /* tp_traverse */
1445 0, /* tp_clear */
1446 0, /* tp_clear */
1446 0, /* tp_richcompare */
1447 0, /* tp_richcompare */
1447 0, /* tp_weaklistoffset */
1448 0, /* tp_weaklistoffset */
1448 0, /* tp_iter */
1449 0, /* tp_iter */
1449 0, /* tp_iternext */
1450 0, /* tp_iternext */
1450 index_methods, /* tp_methods */
1451 index_methods, /* tp_methods */
1451 0, /* tp_members */
1452 0, /* tp_members */
1452 index_getset, /* tp_getset */
1453 index_getset, /* tp_getset */
1453 0, /* tp_base */
1454 0, /* tp_base */
1454 0, /* tp_dict */
1455 0, /* tp_dict */
1455 0, /* tp_descr_get */
1456 0, /* tp_descr_get */
1456 0, /* tp_descr_set */
1457 0, /* tp_descr_set */
1457 0, /* tp_dictoffset */
1458 0, /* tp_dictoffset */
1458 (initproc)index_init, /* tp_init */
1459 (initproc)index_init, /* tp_init */
1459 0, /* tp_alloc */
1460 0, /* tp_alloc */
1460 };
1461 };
1461
1462
1462 /*
1463 /*
1463 * returns a tuple of the form (index, index, cache) with elements as
1464 * returns a tuple of the form (index, index, cache) with elements as
1464 * follows:
1465 * follows:
1465 *
1466 *
1466 * index: an index object that lazily parses RevlogNG records
1467 * index: an index object that lazily parses RevlogNG records
1467 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1468 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1468 *
1469 *
1469 * added complications are for backwards compatibility
1470 * added complications are for backwards compatibility
1470 */
1471 */
1471 static PyObject *parse_index2(PyObject *self, PyObject *args)
1472 static PyObject *parse_index2(PyObject *self, PyObject *args)
1472 {
1473 {
1473 PyObject *tuple = NULL, *cache = NULL;
1474 PyObject *tuple = NULL, *cache = NULL;
1474 indexObject *idx;
1475 indexObject *idx;
1475 int ret;
1476 int ret;
1476
1477
1477 idx = PyObject_New(indexObject, &indexType);
1478 idx = PyObject_New(indexObject, &indexType);
1478 if (idx == NULL)
1479 if (idx == NULL)
1479 goto bail;
1480 goto bail;
1480
1481
1481 ret = index_init(idx, args);
1482 ret = index_init(idx, args);
1482 if (ret == -1)
1483 if (ret == -1)
1483 goto bail;
1484 goto bail;
1484
1485
1485 if (idx->inlined) {
1486 if (idx->inlined) {
1486 cache = Py_BuildValue("iO", 0, idx->data);
1487 cache = Py_BuildValue("iO", 0, idx->data);
1487 if (cache == NULL)
1488 if (cache == NULL)
1488 goto bail;
1489 goto bail;
1489 } else {
1490 } else {
1490 cache = Py_None;
1491 cache = Py_None;
1491 Py_INCREF(cache);
1492 Py_INCREF(cache);
1492 }
1493 }
1493
1494
1494 tuple = Py_BuildValue("NN", idx, cache);
1495 tuple = Py_BuildValue("NN", idx, cache);
1495 if (!tuple)
1496 if (!tuple)
1496 goto bail;
1497 goto bail;
1497 return tuple;
1498 return tuple;
1498
1499
1499 bail:
1500 bail:
1500 Py_XDECREF(idx);
1501 Py_XDECREF(idx);
1501 Py_XDECREF(cache);
1502 Py_XDECREF(cache);
1502 Py_XDECREF(tuple);
1503 Py_XDECREF(tuple);
1503 return NULL;
1504 return NULL;
1504 }
1505 }
1505
1506
1506 static char parsers_doc[] = "Efficient content parsing.";
1507 static char parsers_doc[] = "Efficient content parsing.";
1507
1508
1508 static PyMethodDef methods[] = {
1509 static PyMethodDef methods[] = {
1509 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1510 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1510 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1511 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1511 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1512 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1512 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1513 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1513 {NULL, NULL}
1514 {NULL, NULL}
1514 };
1515 };
1515
1516
1516 static void module_init(PyObject *mod)
1517 static void module_init(PyObject *mod)
1517 {
1518 {
1518 indexType.tp_new = PyType_GenericNew;
1519 indexType.tp_new = PyType_GenericNew;
1519 if (PyType_Ready(&indexType) < 0)
1520 if (PyType_Ready(&indexType) < 0)
1520 return;
1521 return;
1521 Py_INCREF(&indexType);
1522 Py_INCREF(&indexType);
1522
1523
1523 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1524 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1524
1525
1525 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1526 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1526 -1, -1, -1, -1, nullid, 20);
1527 -1, -1, -1, -1, nullid, 20);
1527 if (nullentry)
1528 if (nullentry)
1528 PyObject_GC_UnTrack(nullentry);
1529 PyObject_GC_UnTrack(nullentry);
1529
1530
1530 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1531 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1531 }
1532 }
1532
1533
1533 #ifdef IS_PY3K
1534 #ifdef IS_PY3K
1534 static struct PyModuleDef parsers_module = {
1535 static struct PyModuleDef parsers_module = {
1535 PyModuleDef_HEAD_INIT,
1536 PyModuleDef_HEAD_INIT,
1536 "parsers",
1537 "parsers",
1537 parsers_doc,
1538 parsers_doc,
1538 -1,
1539 -1,
1539 methods
1540 methods
1540 };
1541 };
1541
1542
1542 PyMODINIT_FUNC PyInit_parsers(void)
1543 PyMODINIT_FUNC PyInit_parsers(void)
1543 {
1544 {
1544 PyObject *mod = PyModule_Create(&parsers_module);
1545 PyObject *mod = PyModule_Create(&parsers_module);
1545 module_init(mod);
1546 module_init(mod);
1546 return mod;
1547 return mod;
1547 }
1548 }
1548 #else
1549 #else
1549 PyMODINIT_FUNC initparsers(void)
1550 PyMODINIT_FUNC initparsers(void)
1550 {
1551 {
1551 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1552 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1552 module_init(mod);
1553 module_init(mod);
1553 }
1554 }
1554 #endif
1555 #endif
General Comments 0
You need to be logged in to leave comments. Login now