##// END OF EJS Templates
parsers: ensure that nullid is always present in the radix tree
Bryan O'Sullivan -
r16664:5bc6edf7 default
parent child Browse files
Show More
@@ -1,1218 +1,1220
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <string.h>
12 #include <string.h>
13
13
14 #include "util.h"
14 #include "util.h"
15
15
16 static inline int hexdigit(const char *p, Py_ssize_t off)
16 static inline int hexdigit(const char *p, Py_ssize_t off)
17 {
17 {
18 char c = p[off];
18 char c = p[off];
19
19
20 if (c >= '0' && c <= '9')
20 if (c >= '0' && c <= '9')
21 return c - '0';
21 return c - '0';
22 if (c >= 'a' && c <= 'f')
22 if (c >= 'a' && c <= 'f')
23 return c - 'a' + 10;
23 return c - 'a' + 10;
24 if (c >= 'A' && c <= 'F')
24 if (c >= 'A' && c <= 'F')
25 return c - 'A' + 10;
25 return c - 'A' + 10;
26
26
27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 return 0;
28 return 0;
29 }
29 }
30
30
31 /*
31 /*
32 * Turn a hex-encoded string into binary.
32 * Turn a hex-encoded string into binary.
33 */
33 */
34 static PyObject *unhexlify(const char *str, int len)
34 static PyObject *unhexlify(const char *str, int len)
35 {
35 {
36 PyObject *ret;
36 PyObject *ret;
37 char *d;
37 char *d;
38 int i;
38 int i;
39
39
40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41
41
42 if (!ret)
42 if (!ret)
43 return NULL;
43 return NULL;
44
44
45 d = PyBytes_AsString(ret);
45 d = PyBytes_AsString(ret);
46
46
47 for (i = 0; i < len;) {
47 for (i = 0; i < len;) {
48 int hi = hexdigit(str, i++);
48 int hi = hexdigit(str, i++);
49 int lo = hexdigit(str, i++);
49 int lo = hexdigit(str, i++);
50 *d++ = (hi << 4) | lo;
50 *d++ = (hi << 4) | lo;
51 }
51 }
52
52
53 return ret;
53 return ret;
54 }
54 }
55
55
56 /*
56 /*
57 * This code assumes that a manifest is stitched together with newline
57 * This code assumes that a manifest is stitched together with newline
58 * ('\n') characters.
58 * ('\n') characters.
59 */
59 */
60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 {
61 {
62 PyObject *mfdict, *fdict;
62 PyObject *mfdict, *fdict;
63 char *str, *cur, *start, *zero;
63 char *str, *cur, *start, *zero;
64 int len;
64 int len;
65
65
66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 &PyDict_Type, &mfdict,
67 &PyDict_Type, &mfdict,
68 &PyDict_Type, &fdict,
68 &PyDict_Type, &fdict,
69 &str, &len))
69 &str, &len))
70 goto quit;
70 goto quit;
71
71
72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 PyObject *file = NULL, *node = NULL;
73 PyObject *file = NULL, *node = NULL;
74 PyObject *flags = NULL;
74 PyObject *flags = NULL;
75 int nlen;
75 int nlen;
76
76
77 if (!*cur) {
77 if (!*cur) {
78 zero = cur;
78 zero = cur;
79 continue;
79 continue;
80 }
80 }
81 else if (*cur != '\n')
81 else if (*cur != '\n')
82 continue;
82 continue;
83
83
84 if (!zero) {
84 if (!zero) {
85 PyErr_SetString(PyExc_ValueError,
85 PyErr_SetString(PyExc_ValueError,
86 "manifest entry has no separator");
86 "manifest entry has no separator");
87 goto quit;
87 goto quit;
88 }
88 }
89
89
90 file = PyBytes_FromStringAndSize(start, zero - start);
90 file = PyBytes_FromStringAndSize(start, zero - start);
91
91
92 if (!file)
92 if (!file)
93 goto bail;
93 goto bail;
94
94
95 nlen = cur - zero - 1;
95 nlen = cur - zero - 1;
96
96
97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
98 if (!node)
98 if (!node)
99 goto bail;
99 goto bail;
100
100
101 if (nlen > 40) {
101 if (nlen > 40) {
102 flags = PyBytes_FromStringAndSize(zero + 41,
102 flags = PyBytes_FromStringAndSize(zero + 41,
103 nlen - 40);
103 nlen - 40);
104 if (!flags)
104 if (!flags)
105 goto bail;
105 goto bail;
106
106
107 if (PyDict_SetItem(fdict, file, flags) == -1)
107 if (PyDict_SetItem(fdict, file, flags) == -1)
108 goto bail;
108 goto bail;
109 }
109 }
110
110
111 if (PyDict_SetItem(mfdict, file, node) == -1)
111 if (PyDict_SetItem(mfdict, file, node) == -1)
112 goto bail;
112 goto bail;
113
113
114 start = cur + 1;
114 start = cur + 1;
115 zero = NULL;
115 zero = NULL;
116
116
117 Py_XDECREF(flags);
117 Py_XDECREF(flags);
118 Py_XDECREF(node);
118 Py_XDECREF(node);
119 Py_XDECREF(file);
119 Py_XDECREF(file);
120 continue;
120 continue;
121 bail:
121 bail:
122 Py_XDECREF(flags);
122 Py_XDECREF(flags);
123 Py_XDECREF(node);
123 Py_XDECREF(node);
124 Py_XDECREF(file);
124 Py_XDECREF(file);
125 goto quit;
125 goto quit;
126 }
126 }
127
127
128 if (len > 0 && *(cur - 1) != '\n') {
128 if (len > 0 && *(cur - 1) != '\n') {
129 PyErr_SetString(PyExc_ValueError,
129 PyErr_SetString(PyExc_ValueError,
130 "manifest contains trailing garbage");
130 "manifest contains trailing garbage");
131 goto quit;
131 goto quit;
132 }
132 }
133
133
134 Py_INCREF(Py_None);
134 Py_INCREF(Py_None);
135 return Py_None;
135 return Py_None;
136 quit:
136 quit:
137 return NULL;
137 return NULL;
138 }
138 }
139
139
140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 {
141 {
142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 char *str, *cur, *end, *cpos;
144 char *str, *cur, *end, *cpos;
145 int state, mode, size, mtime;
145 int state, mode, size, mtime;
146 unsigned int flen;
146 unsigned int flen;
147 int len;
147 int len;
148
148
149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 &PyDict_Type, &dmap,
150 &PyDict_Type, &dmap,
151 &PyDict_Type, &cmap,
151 &PyDict_Type, &cmap,
152 &str, &len))
152 &str, &len))
153 goto quit;
153 goto quit;
154
154
155 /* read parents */
155 /* read parents */
156 if (len < 40)
156 if (len < 40)
157 goto quit;
157 goto quit;
158
158
159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 if (!parents)
160 if (!parents)
161 goto quit;
161 goto quit;
162
162
163 /* read filenames */
163 /* read filenames */
164 cur = str + 40;
164 cur = str + 40;
165 end = str + len;
165 end = str + len;
166
166
167 while (cur < end - 17) {
167 while (cur < end - 17) {
168 /* unpack header */
168 /* unpack header */
169 state = *cur;
169 state = *cur;
170 mode = getbe32(cur + 1);
170 mode = getbe32(cur + 1);
171 size = getbe32(cur + 5);
171 size = getbe32(cur + 5);
172 mtime = getbe32(cur + 9);
172 mtime = getbe32(cur + 9);
173 flen = getbe32(cur + 13);
173 flen = getbe32(cur + 13);
174 cur += 17;
174 cur += 17;
175 if (cur + flen > end || cur + flen < cur) {
175 if (cur + flen > end || cur + flen < cur) {
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 goto quit;
177 goto quit;
178 }
178 }
179
179
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 if (!entry)
181 if (!entry)
182 goto quit;
182 goto quit;
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184
184
185 cpos = memchr(cur, 0, flen);
185 cpos = memchr(cur, 0, flen);
186 if (cpos) {
186 if (cpos) {
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 cname = PyBytes_FromStringAndSize(cpos + 1,
188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 flen - (cpos - cur) - 1);
189 flen - (cpos - cur) - 1);
190 if (!fname || !cname ||
190 if (!fname || !cname ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 PyDict_SetItem(dmap, fname, entry) == -1)
192 PyDict_SetItem(dmap, fname, entry) == -1)
193 goto quit;
193 goto quit;
194 Py_DECREF(cname);
194 Py_DECREF(cname);
195 } else {
195 } else {
196 fname = PyBytes_FromStringAndSize(cur, flen);
196 fname = PyBytes_FromStringAndSize(cur, flen);
197 if (!fname ||
197 if (!fname ||
198 PyDict_SetItem(dmap, fname, entry) == -1)
198 PyDict_SetItem(dmap, fname, entry) == -1)
199 goto quit;
199 goto quit;
200 }
200 }
201 cur += flen;
201 cur += flen;
202 Py_DECREF(fname);
202 Py_DECREF(fname);
203 Py_DECREF(entry);
203 Py_DECREF(entry);
204 fname = cname = entry = NULL;
204 fname = cname = entry = NULL;
205 }
205 }
206
206
207 ret = parents;
207 ret = parents;
208 Py_INCREF(ret);
208 Py_INCREF(ret);
209 quit:
209 quit:
210 Py_XDECREF(fname);
210 Py_XDECREF(fname);
211 Py_XDECREF(cname);
211 Py_XDECREF(cname);
212 Py_XDECREF(entry);
212 Py_XDECREF(entry);
213 Py_XDECREF(parents);
213 Py_XDECREF(parents);
214 return ret;
214 return ret;
215 }
215 }
216
216
217 /*
217 /*
218 * A base-16 trie for fast node->rev mapping.
218 * A base-16 trie for fast node->rev mapping.
219 *
219 *
220 * Positive value is index of the next node in the trie
220 * Positive value is index of the next node in the trie
221 * Negative value is a leaf: -(rev + 1)
221 * Negative value is a leaf: -(rev + 1)
222 * Zero is empty
222 * Zero is empty
223 */
223 */
224 typedef struct {
224 typedef struct {
225 int children[16];
225 int children[16];
226 } nodetree;
226 } nodetree;
227
227
228 /*
228 /*
229 * This class has two behaviours.
229 * This class has two behaviours.
230 *
230 *
231 * When used in a list-like way (with integer keys), we decode an
231 * When used in a list-like way (with integer keys), we decode an
232 * entry in a RevlogNG index file on demand. Our last entry is a
232 * entry in a RevlogNG index file on demand. Our last entry is a
233 * sentinel, always a nullid. We have limited support for
233 * sentinel, always a nullid. We have limited support for
234 * integer-keyed insert and delete, only at elements right before the
234 * integer-keyed insert and delete, only at elements right before the
235 * sentinel.
235 * sentinel.
236 *
236 *
237 * With string keys, we lazily perform a reverse mapping from node to
237 * With string keys, we lazily perform a reverse mapping from node to
238 * rev, using a base-16 trie.
238 * rev, using a base-16 trie.
239 */
239 */
240 typedef struct {
240 typedef struct {
241 PyObject_HEAD
241 PyObject_HEAD
242 /* Type-specific fields go here. */
242 /* Type-specific fields go here. */
243 PyObject *data; /* raw bytes of index */
243 PyObject *data; /* raw bytes of index */
244 PyObject **cache; /* cached tuples */
244 PyObject **cache; /* cached tuples */
245 const char **offsets; /* populated on demand */
245 const char **offsets; /* populated on demand */
246 Py_ssize_t raw_length; /* original number of elements */
246 Py_ssize_t raw_length; /* original number of elements */
247 Py_ssize_t length; /* current number of elements */
247 Py_ssize_t length; /* current number of elements */
248 PyObject *added; /* populated on demand */
248 PyObject *added; /* populated on demand */
249 nodetree *nt; /* base-16 trie */
249 nodetree *nt; /* base-16 trie */
250 int ntlength; /* # nodes in use */
250 int ntlength; /* # nodes in use */
251 int ntcapacity; /* # nodes allocated */
251 int ntcapacity; /* # nodes allocated */
252 int ntdepth; /* maximum depth of tree */
252 int ntdepth; /* maximum depth of tree */
253 int ntsplits; /* # splits performed */
253 int ntsplits; /* # splits performed */
254 int ntrev; /* last rev scanned */
254 int ntrev; /* last rev scanned */
255 int ntlookups; /* # lookups */
255 int ntlookups; /* # lookups */
256 int ntmisses; /* # lookups that miss the cache */
256 int ntmisses; /* # lookups that miss the cache */
257 int inlined;
257 int inlined;
258 } indexObject;
258 } indexObject;
259
259
260 static Py_ssize_t index_length(const indexObject *self)
260 static Py_ssize_t index_length(const indexObject *self)
261 {
261 {
262 if (self->added == NULL)
262 if (self->added == NULL)
263 return self->length;
263 return self->length;
264 return self->length + PyList_GET_SIZE(self->added);
264 return self->length + PyList_GET_SIZE(self->added);
265 }
265 }
266
266
267 static PyObject *nullentry;
267 static PyObject *nullentry;
268 static const char nullid[20];
268 static const char nullid[20];
269
269
270 static long inline_scan(indexObject *self, const char **offsets);
270 static long inline_scan(indexObject *self, const char **offsets);
271
271
272 #if LONG_MAX == 0x7fffffffL
272 #if LONG_MAX == 0x7fffffffL
273 static char *tuple_format = "Kiiiiiis#";
273 static char *tuple_format = "Kiiiiiis#";
274 #else
274 #else
275 static char *tuple_format = "kiiiiiis#";
275 static char *tuple_format = "kiiiiiis#";
276 #endif
276 #endif
277
277
278 /*
278 /*
279 * Return a pointer to the beginning of a RevlogNG record.
279 * Return a pointer to the beginning of a RevlogNG record.
280 */
280 */
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 {
282 {
283 if (self->inlined && pos > 0) {
283 if (self->inlined && pos > 0) {
284 if (self->offsets == NULL) {
284 if (self->offsets == NULL) {
285 self->offsets = malloc(self->raw_length *
285 self->offsets = malloc(self->raw_length *
286 sizeof(*self->offsets));
286 sizeof(*self->offsets));
287 if (self->offsets == NULL)
287 if (self->offsets == NULL)
288 return (const char *)PyErr_NoMemory();
288 return (const char *)PyErr_NoMemory();
289 inline_scan(self, self->offsets);
289 inline_scan(self, self->offsets);
290 }
290 }
291 return self->offsets[pos];
291 return self->offsets[pos];
292 }
292 }
293
293
294 return PyString_AS_STRING(self->data) + pos * 64;
294 return PyString_AS_STRING(self->data) + pos * 64;
295 }
295 }
296
296
297 /*
297 /*
298 * RevlogNG format (all in big endian, data may be inlined):
298 * RevlogNG format (all in big endian, data may be inlined):
299 * 6 bytes: offset
299 * 6 bytes: offset
300 * 2 bytes: flags
300 * 2 bytes: flags
301 * 4 bytes: compressed length
301 * 4 bytes: compressed length
302 * 4 bytes: uncompressed length
302 * 4 bytes: uncompressed length
303 * 4 bytes: base revision
303 * 4 bytes: base revision
304 * 4 bytes: link revision
304 * 4 bytes: link revision
305 * 4 bytes: parent 1 revision
305 * 4 bytes: parent 1 revision
306 * 4 bytes: parent 2 revision
306 * 4 bytes: parent 2 revision
307 * 32 bytes: nodeid (only 20 bytes used)
307 * 32 bytes: nodeid (only 20 bytes used)
308 */
308 */
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 {
310 {
311 uint64_t offset_flags;
311 uint64_t offset_flags;
312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 const char *c_node_id;
313 const char *c_node_id;
314 const char *data;
314 const char *data;
315 Py_ssize_t length = index_length(self);
315 Py_ssize_t length = index_length(self);
316 PyObject *entry;
316 PyObject *entry;
317
317
318 if (pos < 0)
318 if (pos < 0)
319 pos += length;
319 pos += length;
320
320
321 if (pos < 0 || pos >= length) {
321 if (pos < 0 || pos >= length) {
322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
323 return NULL;
323 return NULL;
324 }
324 }
325
325
326 if (pos == length - 1) {
326 if (pos == length - 1) {
327 Py_INCREF(nullentry);
327 Py_INCREF(nullentry);
328 return nullentry;
328 return nullentry;
329 }
329 }
330
330
331 if (pos >= self->length - 1) {
331 if (pos >= self->length - 1) {
332 PyObject *obj;
332 PyObject *obj;
333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
334 Py_INCREF(obj);
334 Py_INCREF(obj);
335 return obj;
335 return obj;
336 }
336 }
337
337
338 if (self->cache) {
338 if (self->cache) {
339 if (self->cache[pos]) {
339 if (self->cache[pos]) {
340 Py_INCREF(self->cache[pos]);
340 Py_INCREF(self->cache[pos]);
341 return self->cache[pos];
341 return self->cache[pos];
342 }
342 }
343 } else {
343 } else {
344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
345 if (self->cache == NULL)
345 if (self->cache == NULL)
346 return PyErr_NoMemory();
346 return PyErr_NoMemory();
347 }
347 }
348
348
349 data = index_deref(self, pos);
349 data = index_deref(self, pos);
350 if (data == NULL)
350 if (data == NULL)
351 return NULL;
351 return NULL;
352
352
353 offset_flags = getbe32(data + 4);
353 offset_flags = getbe32(data + 4);
354 if (pos == 0) /* mask out version number for the first entry */
354 if (pos == 0) /* mask out version number for the first entry */
355 offset_flags &= 0xFFFF;
355 offset_flags &= 0xFFFF;
356 else {
356 else {
357 uint32_t offset_high = getbe32(data);
357 uint32_t offset_high = getbe32(data);
358 offset_flags |= ((uint64_t)offset_high) << 32;
358 offset_flags |= ((uint64_t)offset_high) << 32;
359 }
359 }
360
360
361 comp_len = getbe32(data + 8);
361 comp_len = getbe32(data + 8);
362 uncomp_len = getbe32(data + 12);
362 uncomp_len = getbe32(data + 12);
363 base_rev = getbe32(data + 16);
363 base_rev = getbe32(data + 16);
364 link_rev = getbe32(data + 20);
364 link_rev = getbe32(data + 20);
365 parent_1 = getbe32(data + 24);
365 parent_1 = getbe32(data + 24);
366 parent_2 = getbe32(data + 28);
366 parent_2 = getbe32(data + 28);
367 c_node_id = data + 32;
367 c_node_id = data + 32;
368
368
369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
370 uncomp_len, base_rev, link_rev,
370 uncomp_len, base_rev, link_rev,
371 parent_1, parent_2, c_node_id, 20);
371 parent_1, parent_2, c_node_id, 20);
372
372
373 if (entry)
373 if (entry)
374 PyObject_GC_UnTrack(entry);
374 PyObject_GC_UnTrack(entry);
375
375
376 self->cache[pos] = entry;
376 self->cache[pos] = entry;
377 Py_INCREF(entry);
377 Py_INCREF(entry);
378
378
379 return entry;
379 return entry;
380 }
380 }
381
381
382 /*
382 /*
383 * Return the 20-byte SHA of the node corresponding to the given rev.
383 * Return the 20-byte SHA of the node corresponding to the given rev.
384 */
384 */
385 static const char *index_node(indexObject *self, Py_ssize_t pos)
385 static const char *index_node(indexObject *self, Py_ssize_t pos)
386 {
386 {
387 Py_ssize_t length = index_length(self);
387 Py_ssize_t length = index_length(self);
388 const char *data;
388 const char *data;
389
389
390 if (pos == length - 1)
390 if (pos == length - 1 || pos == INT_MAX)
391 return nullid;
391 return nullid;
392
392
393 if (pos >= length)
393 if (pos >= length)
394 return NULL;
394 return NULL;
395
395
396 if (pos >= self->length - 1) {
396 if (pos >= self->length - 1) {
397 PyObject *tuple, *str;
397 PyObject *tuple, *str;
398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
399 str = PyTuple_GetItem(tuple, 7);
399 str = PyTuple_GetItem(tuple, 7);
400 return str ? PyString_AS_STRING(str) : NULL;
400 return str ? PyString_AS_STRING(str) : NULL;
401 }
401 }
402
402
403 data = index_deref(self, pos);
403 data = index_deref(self, pos);
404 return data ? data + 32 : NULL;
404 return data ? data + 32 : NULL;
405 }
405 }
406
406
407 static int nt_insert(indexObject *self, const char *node, int rev);
407 static int nt_insert(indexObject *self, const char *node, int rev);
408
408
409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
410 {
410 {
411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
412 return -1;
412 return -1;
413 if (*nodelen == 20)
413 if (*nodelen == 20)
414 return 0;
414 return 0;
415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
416 return -1;
416 return -1;
417 }
417 }
418
418
419 static PyObject *index_insert(indexObject *self, PyObject *args)
419 static PyObject *index_insert(indexObject *self, PyObject *args)
420 {
420 {
421 PyObject *obj;
421 PyObject *obj;
422 char *node;
422 char *node;
423 long offset;
423 long offset;
424 Py_ssize_t len, nodelen;
424 Py_ssize_t len, nodelen;
425
425
426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
427 return NULL;
427 return NULL;
428
428
429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
431 return NULL;
431 return NULL;
432 }
432 }
433
433
434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
435 return NULL;
435 return NULL;
436
436
437 len = index_length(self);
437 len = index_length(self);
438
438
439 if (offset < 0)
439 if (offset < 0)
440 offset += len;
440 offset += len;
441
441
442 if (offset != len - 1) {
442 if (offset != len - 1) {
443 PyErr_SetString(PyExc_IndexError,
443 PyErr_SetString(PyExc_IndexError,
444 "insert only supported at index -1");
444 "insert only supported at index -1");
445 return NULL;
445 return NULL;
446 }
446 }
447
447
448 if (offset > INT_MAX) {
448 if (offset > INT_MAX) {
449 PyErr_SetString(PyExc_ValueError,
449 PyErr_SetString(PyExc_ValueError,
450 "currently only 2**31 revs supported");
450 "currently only 2**31 revs supported");
451 return NULL;
451 return NULL;
452 }
452 }
453
453
454 if (self->added == NULL) {
454 if (self->added == NULL) {
455 self->added = PyList_New(0);
455 self->added = PyList_New(0);
456 if (self->added == NULL)
456 if (self->added == NULL)
457 return NULL;
457 return NULL;
458 }
458 }
459
459
460 if (PyList_Append(self->added, obj) == -1)
460 if (PyList_Append(self->added, obj) == -1)
461 return NULL;
461 return NULL;
462
462
463 if (self->nt)
463 if (self->nt)
464 nt_insert(self, node, (int)offset);
464 nt_insert(self, node, (int)offset);
465
465
466 Py_RETURN_NONE;
466 Py_RETURN_NONE;
467 }
467 }
468
468
469 static void _index_clearcaches(indexObject *self)
469 static void _index_clearcaches(indexObject *self)
470 {
470 {
471 if (self->cache) {
471 if (self->cache) {
472 Py_ssize_t i;
472 Py_ssize_t i;
473
473
474 for (i = 0; i < self->raw_length; i++) {
474 for (i = 0; i < self->raw_length; i++) {
475 if (self->cache[i]) {
475 if (self->cache[i]) {
476 Py_DECREF(self->cache[i]);
476 Py_DECREF(self->cache[i]);
477 self->cache[i] = NULL;
477 self->cache[i] = NULL;
478 }
478 }
479 }
479 }
480 free(self->cache);
480 free(self->cache);
481 self->cache = NULL;
481 self->cache = NULL;
482 }
482 }
483 if (self->offsets) {
483 if (self->offsets) {
484 free(self->offsets);
484 free(self->offsets);
485 self->offsets = NULL;
485 self->offsets = NULL;
486 }
486 }
487 if (self->nt) {
487 if (self->nt) {
488 free(self->nt);
488 free(self->nt);
489 self->nt = NULL;
489 self->nt = NULL;
490 }
490 }
491 }
491 }
492
492
493 static PyObject *index_clearcaches(indexObject *self)
493 static PyObject *index_clearcaches(indexObject *self)
494 {
494 {
495 _index_clearcaches(self);
495 _index_clearcaches(self);
496 self->ntlength = self->ntcapacity = 0;
496 self->ntlength = self->ntcapacity = 0;
497 self->ntdepth = self->ntsplits = 0;
497 self->ntdepth = self->ntsplits = 0;
498 self->ntrev = -1;
498 self->ntrev = -1;
499 self->ntlookups = self->ntmisses = 0;
499 self->ntlookups = self->ntmisses = 0;
500 Py_RETURN_NONE;
500 Py_RETURN_NONE;
501 }
501 }
502
502
503 static PyObject *index_stats(indexObject *self)
503 static PyObject *index_stats(indexObject *self)
504 {
504 {
505 PyObject *obj = PyDict_New();
505 PyObject *obj = PyDict_New();
506
506
507 if (obj == NULL)
507 if (obj == NULL)
508 return NULL;
508 return NULL;
509
509
510 #define istat(__n, __d) \
510 #define istat(__n, __d) \
511 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
511 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
512 goto bail;
512 goto bail;
513
513
514 if (self->added) {
514 if (self->added) {
515 Py_ssize_t len = PyList_GET_SIZE(self->added);
515 Py_ssize_t len = PyList_GET_SIZE(self->added);
516 if (PyDict_SetItemString(obj, "index entries added",
516 if (PyDict_SetItemString(obj, "index entries added",
517 PyInt_FromSsize_t(len)) == -1)
517 PyInt_FromSsize_t(len)) == -1)
518 goto bail;
518 goto bail;
519 }
519 }
520
520
521 if (self->raw_length != self->length - 1)
521 if (self->raw_length != self->length - 1)
522 istat(raw_length, "revs on disk");
522 istat(raw_length, "revs on disk");
523 istat(length, "revs in memory");
523 istat(length, "revs in memory");
524 istat(ntcapacity, "node trie capacity");
524 istat(ntcapacity, "node trie capacity");
525 istat(ntdepth, "node trie depth");
525 istat(ntdepth, "node trie depth");
526 istat(ntlength, "node trie count");
526 istat(ntlength, "node trie count");
527 istat(ntlookups, "node trie lookups");
527 istat(ntlookups, "node trie lookups");
528 istat(ntmisses, "node trie misses");
528 istat(ntmisses, "node trie misses");
529 istat(ntrev, "node trie last rev scanned");
529 istat(ntrev, "node trie last rev scanned");
530 istat(ntsplits, "node trie splits");
530 istat(ntsplits, "node trie splits");
531
531
532 #undef istat
532 #undef istat
533
533
534 return obj;
534 return obj;
535
535
536 bail:
536 bail:
537 Py_XDECREF(obj);
537 Py_XDECREF(obj);
538 return NULL;
538 return NULL;
539 }
539 }
540
540
541 static inline int nt_level(const char *node, Py_ssize_t level)
541 static inline int nt_level(const char *node, Py_ssize_t level)
542 {
542 {
543 int v = node[level>>1];
543 int v = node[level>>1];
544 if (!(level & 1))
544 if (!(level & 1))
545 v >>= 4;
545 v >>= 4;
546 return v & 0xf;
546 return v & 0xf;
547 }
547 }
548
548
549 /*
549 /*
550 * Return values:
550 * Return values:
551 *
551 *
552 * -4: match is ambiguous (multiple candidates)
552 * -4: match is ambiguous (multiple candidates)
553 * -2: not found
553 * -2: not found
554 * rest: valid rev
554 * rest: valid rev
555 */
555 */
556 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
556 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
557 int hex)
557 int hex)
558 {
558 {
559 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
559 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
560 int level, maxlevel, off;
560 int level, maxlevel, off;
561
561
562 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
562 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
563 return -1;
563 return -1;
564
564
565 if (self->nt == NULL)
565 if (self->nt == NULL)
566 return -2;
566 return -2;
567
567
568 if (hex)
568 if (hex)
569 maxlevel = nodelen > 40 ? 40 : nodelen;
569 maxlevel = nodelen > 40 ? 40 : nodelen;
570 else
570 else
571 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
571 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
572
572
573 for (level = off = 0; level < maxlevel; level++) {
573 for (level = off = 0; level < maxlevel; level++) {
574 int k = getnybble(node, level);
574 int k = getnybble(node, level);
575 nodetree *n = &self->nt[off];
575 nodetree *n = &self->nt[off];
576 int v = n->children[k];
576 int v = n->children[k];
577
577
578 if (v < 0) {
578 if (v < 0) {
579 const char *n;
579 const char *n;
580 Py_ssize_t i;
580 Py_ssize_t i;
581
581
582 v = -v - 1;
582 v = -v - 1;
583 n = index_node(self, v);
583 n = index_node(self, v);
584 if (n == NULL)
584 if (n == NULL)
585 return -2;
585 return -2;
586 for (i = level; i < maxlevel; i++)
586 for (i = level; i < maxlevel; i++)
587 if (getnybble(node, i) != nt_level(n, i))
587 if (getnybble(node, i) != nt_level(n, i))
588 return -2;
588 return -2;
589 return v;
589 return v;
590 }
590 }
591 if (v == 0)
591 if (v == 0)
592 return -2;
592 return -2;
593 off = v;
593 off = v;
594 }
594 }
595 /* multiple matches against an ambiguous prefix */
595 /* multiple matches against an ambiguous prefix */
596 return -4;
596 return -4;
597 }
597 }
598
598
599 static int nt_new(indexObject *self)
599 static int nt_new(indexObject *self)
600 {
600 {
601 if (self->ntlength == self->ntcapacity) {
601 if (self->ntlength == self->ntcapacity) {
602 self->ntcapacity *= 2;
602 self->ntcapacity *= 2;
603 self->nt = realloc(self->nt,
603 self->nt = realloc(self->nt,
604 self->ntcapacity * sizeof(nodetree));
604 self->ntcapacity * sizeof(nodetree));
605 if (self->nt == NULL) {
605 if (self->nt == NULL) {
606 PyErr_SetString(PyExc_MemoryError, "out of memory");
606 PyErr_SetString(PyExc_MemoryError, "out of memory");
607 return -1;
607 return -1;
608 }
608 }
609 memset(&self->nt[self->ntlength], 0,
609 memset(&self->nt[self->ntlength], 0,
610 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
610 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
611 }
611 }
612 return self->ntlength++;
612 return self->ntlength++;
613 }
613 }
614
614
615 static int nt_insert(indexObject *self, const char *node, int rev)
615 static int nt_insert(indexObject *self, const char *node, int rev)
616 {
616 {
617 int level = 0;
617 int level = 0;
618 int off = 0;
618 int off = 0;
619
619
620 while (level < 40) {
620 while (level < 40) {
621 int k = nt_level(node, level);
621 int k = nt_level(node, level);
622 nodetree *n;
622 nodetree *n;
623 int v;
623 int v;
624
624
625 n = &self->nt[off];
625 n = &self->nt[off];
626 v = n->children[k];
626 v = n->children[k];
627
627
628 if (v == 0) {
628 if (v == 0) {
629 n->children[k] = -rev - 1;
629 n->children[k] = -rev - 1;
630 return 0;
630 return 0;
631 }
631 }
632 if (v < 0) {
632 if (v < 0) {
633 const char *oldnode = index_node(self, -v - 1);
633 const char *oldnode = index_node(self, -v - 1);
634 int noff;
634 int noff;
635
635
636 if (!oldnode || !memcmp(oldnode, node, 20)) {
636 if (!oldnode || !memcmp(oldnode, node, 20)) {
637 n->children[k] = -rev - 1;
637 n->children[k] = -rev - 1;
638 return 0;
638 return 0;
639 }
639 }
640 noff = nt_new(self);
640 noff = nt_new(self);
641 if (noff == -1)
641 if (noff == -1)
642 return -1;
642 return -1;
643 /* self->nt may have been changed by realloc */
643 /* self->nt may have been changed by realloc */
644 self->nt[off].children[k] = noff;
644 self->nt[off].children[k] = noff;
645 off = noff;
645 off = noff;
646 n = &self->nt[off];
646 n = &self->nt[off];
647 n->children[nt_level(oldnode, ++level)] = v;
647 n->children[nt_level(oldnode, ++level)] = v;
648 if (level > self->ntdepth)
648 if (level > self->ntdepth)
649 self->ntdepth = level;
649 self->ntdepth = level;
650 self->ntsplits += 1;
650 self->ntsplits += 1;
651 } else {
651 } else {
652 level += 1;
652 level += 1;
653 off = v;
653 off = v;
654 }
654 }
655 }
655 }
656
656
657 return -1;
657 return -1;
658 }
658 }
659
659
660 static int nt_init(indexObject *self)
660 static int nt_init(indexObject *self)
661 {
661 {
662 if (self->nt == NULL) {
662 if (self->nt == NULL) {
663 self->ntcapacity = self->raw_length < 4
663 self->ntcapacity = self->raw_length < 4
664 ? 4 : self->raw_length / 2;
664 ? 4 : self->raw_length / 2;
665 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
665 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
666 if (self->nt == NULL) {
666 if (self->nt == NULL) {
667 PyErr_NoMemory();
667 PyErr_NoMemory();
668 return -1;
668 return -1;
669 }
669 }
670 self->ntlength = 1;
670 self->ntlength = 1;
671 self->ntrev = (int)index_length(self) - 1;
671 self->ntrev = (int)index_length(self) - 1;
672 self->ntlookups = 1;
672 self->ntlookups = 1;
673 self->ntmisses = 0;
673 self->ntmisses = 0;
674 if (nt_insert(self, nullid, INT_MAX) == -1)
675 return -1;
674 }
676 }
675 return 0;
677 return 0;
676 }
678 }
677
679
678 /*
680 /*
679 * Return values:
681 * Return values:
680 *
682 *
681 * -3: error (exception set)
683 * -3: error (exception set)
682 * -2: not found (no exception set)
684 * -2: not found (no exception set)
683 * rest: valid rev
685 * rest: valid rev
684 */
686 */
685 static int index_find_node(indexObject *self,
687 static int index_find_node(indexObject *self,
686 const char *node, Py_ssize_t nodelen)
688 const char *node, Py_ssize_t nodelen)
687 {
689 {
688 int rev;
690 int rev;
689
691
690 self->ntlookups++;
692 self->ntlookups++;
691 rev = nt_find(self, node, nodelen, 0);
693 rev = nt_find(self, node, nodelen, 0);
692 if (rev >= -1)
694 if (rev >= -1)
693 return rev;
695 return rev;
694
696
695 if (nt_init(self) == -1)
697 if (nt_init(self) == -1)
696 return -3;
698 return -3;
697
699
698 /*
700 /*
699 * For the first handful of lookups, we scan the entire index,
701 * For the first handful of lookups, we scan the entire index,
700 * and cache only the matching nodes. This optimizes for cases
702 * and cache only the matching nodes. This optimizes for cases
701 * like "hg tip", where only a few nodes are accessed.
703 * like "hg tip", where only a few nodes are accessed.
702 *
704 *
703 * After that, we cache every node we visit, using a single
705 * After that, we cache every node we visit, using a single
704 * scan amortized over multiple lookups. This gives the best
706 * scan amortized over multiple lookups. This gives the best
705 * bulk performance, e.g. for "hg log".
707 * bulk performance, e.g. for "hg log".
706 */
708 */
707 if (self->ntmisses++ < 4) {
709 if (self->ntmisses++ < 4) {
708 for (rev = self->ntrev - 1; rev >= 0; rev--) {
710 for (rev = self->ntrev - 1; rev >= 0; rev--) {
709 const char *n = index_node(self, rev);
711 const char *n = index_node(self, rev);
710 if (n == NULL)
712 if (n == NULL)
711 return -2;
713 return -2;
712 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
714 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
713 if (nt_insert(self, n, rev) == -1)
715 if (nt_insert(self, n, rev) == -1)
714 return -3;
716 return -3;
715 break;
717 break;
716 }
718 }
717 }
719 }
718 } else {
720 } else {
719 for (rev = self->ntrev - 1; rev >= 0; rev--) {
721 for (rev = self->ntrev - 1; rev >= 0; rev--) {
720 const char *n = index_node(self, rev);
722 const char *n = index_node(self, rev);
721 if (n == NULL) {
723 if (n == NULL) {
722 self->ntrev = rev + 1;
724 self->ntrev = rev + 1;
723 return -2;
725 return -2;
724 }
726 }
725 if (nt_insert(self, n, rev) == -1) {
727 if (nt_insert(self, n, rev) == -1) {
726 self->ntrev = rev + 1;
728 self->ntrev = rev + 1;
727 return -3;
729 return -3;
728 }
730 }
729 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
731 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
730 break;
732 break;
731 }
733 }
732 }
734 }
733 self->ntrev = rev;
735 self->ntrev = rev;
734 }
736 }
735
737
736 if (rev >= 0)
738 if (rev >= 0)
737 return rev;
739 return rev;
738 return -2;
740 return -2;
739 }
741 }
740
742
741 static PyObject *raise_revlog_error(void)
743 static PyObject *raise_revlog_error(void)
742 {
744 {
743 static PyObject *errclass;
745 static PyObject *errclass;
744 PyObject *mod = NULL, *errobj;
746 PyObject *mod = NULL, *errobj;
745
747
746 if (errclass == NULL) {
748 if (errclass == NULL) {
747 PyObject *dict;
749 PyObject *dict;
748
750
749 mod = PyImport_ImportModule("mercurial.error");
751 mod = PyImport_ImportModule("mercurial.error");
750 if (mod == NULL)
752 if (mod == NULL)
751 goto classfail;
753 goto classfail;
752
754
753 dict = PyModule_GetDict(mod);
755 dict = PyModule_GetDict(mod);
754 if (dict == NULL)
756 if (dict == NULL)
755 goto classfail;
757 goto classfail;
756
758
757 errclass = PyDict_GetItemString(dict, "RevlogError");
759 errclass = PyDict_GetItemString(dict, "RevlogError");
758 if (errclass == NULL) {
760 if (errclass == NULL) {
759 PyErr_SetString(PyExc_SystemError,
761 PyErr_SetString(PyExc_SystemError,
760 "could not find RevlogError");
762 "could not find RevlogError");
761 goto classfail;
763 goto classfail;
762 }
764 }
763 Py_INCREF(errclass);
765 Py_INCREF(errclass);
764 }
766 }
765
767
766 errobj = PyObject_CallFunction(errclass, NULL);
768 errobj = PyObject_CallFunction(errclass, NULL);
767 if (errobj == NULL)
769 if (errobj == NULL)
768 return NULL;
770 return NULL;
769 PyErr_SetObject(errclass, errobj);
771 PyErr_SetObject(errclass, errobj);
770 return errobj;
772 return errobj;
771
773
772 classfail:
774 classfail:
773 Py_XDECREF(mod);
775 Py_XDECREF(mod);
774 return NULL;
776 return NULL;
775 }
777 }
776
778
777 static PyObject *index_getitem(indexObject *self, PyObject *value)
779 static PyObject *index_getitem(indexObject *self, PyObject *value)
778 {
780 {
779 char *node;
781 char *node;
780 Py_ssize_t nodelen;
782 Py_ssize_t nodelen;
781 int rev;
783 int rev;
782
784
783 if (PyInt_Check(value))
785 if (PyInt_Check(value))
784 return index_get(self, PyInt_AS_LONG(value));
786 return index_get(self, PyInt_AS_LONG(value));
785
787
786 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
788 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
787 return NULL;
789 return NULL;
788 rev = index_find_node(self, node, nodelen);
790 rev = index_find_node(self, node, nodelen);
789 if (rev >= -1)
791 if (rev >= -1)
790 return PyInt_FromLong(rev);
792 return PyInt_FromLong(rev);
791 if (rev == -2)
793 if (rev == -2)
792 raise_revlog_error();
794 raise_revlog_error();
793 return NULL;
795 return NULL;
794 }
796 }
795
797
796 static PyObject *index_m_get(indexObject *self, PyObject *args)
798 static PyObject *index_m_get(indexObject *self, PyObject *args)
797 {
799 {
798 char *node;
800 char *node;
799 int nodelen, rev;
801 int nodelen, rev;
800
802
801 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
803 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
802 return NULL;
804 return NULL;
803
805
804 rev = index_find_node(self, node, nodelen);
806 rev = index_find_node(self, node, nodelen);
805 if (rev == -3)
807 if (rev == -3)
806 return NULL;
808 return NULL;
807 if (rev == -2)
809 if (rev == -2)
808 Py_RETURN_NONE;
810 Py_RETURN_NONE;
809 return PyInt_FromLong(rev);
811 return PyInt_FromLong(rev);
810 }
812 }
811
813
812 static int index_contains(indexObject *self, PyObject *value)
814 static int index_contains(indexObject *self, PyObject *value)
813 {
815 {
814 char *node;
816 char *node;
815 Py_ssize_t nodelen;
817 Py_ssize_t nodelen;
816
818
817 if (PyInt_Check(value)) {
819 if (PyInt_Check(value)) {
818 long rev = PyInt_AS_LONG(value);
820 long rev = PyInt_AS_LONG(value);
819 return rev >= -1 && rev < index_length(self);
821 return rev >= -1 && rev < index_length(self);
820 }
822 }
821
823
822 if (!PyString_Check(value))
824 if (!PyString_Check(value))
823 return 0;
825 return 0;
824
826
825 node = PyString_AS_STRING(value);
827 node = PyString_AS_STRING(value);
826 nodelen = PyString_GET_SIZE(value);
828 nodelen = PyString_GET_SIZE(value);
827
829
828 switch (index_find_node(self, node, nodelen)) {
830 switch (index_find_node(self, node, nodelen)) {
829 case -3:
831 case -3:
830 return -1;
832 return -1;
831 case -2:
833 case -2:
832 return 0;
834 return 0;
833 default:
835 default:
834 return 1;
836 return 1;
835 }
837 }
836 }
838 }
837
839
838 /*
840 /*
839 * Invalidate any trie entries introduced by added revs.
841 * Invalidate any trie entries introduced by added revs.
840 */
842 */
841 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
843 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
842 {
844 {
843 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
845 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
844
846
845 for (i = start; i < len; i++) {
847 for (i = start; i < len; i++) {
846 PyObject *tuple = PyList_GET_ITEM(self->added, i);
848 PyObject *tuple = PyList_GET_ITEM(self->added, i);
847 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
849 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
848
850
849 nt_insert(self, PyString_AS_STRING(node), -1);
851 nt_insert(self, PyString_AS_STRING(node), -1);
850 }
852 }
851
853
852 if (start == 0) {
854 if (start == 0) {
853 Py_DECREF(self->added);
855 Py_DECREF(self->added);
854 self->added = NULL;
856 self->added = NULL;
855 }
857 }
856 }
858 }
857
859
858 /*
860 /*
859 * Delete a numeric range of revs, which must be at the end of the
861 * Delete a numeric range of revs, which must be at the end of the
860 * range, but exclude the sentinel nullid entry.
862 * range, but exclude the sentinel nullid entry.
861 */
863 */
862 static int index_slice_del(indexObject *self, PyObject *item)
864 static int index_slice_del(indexObject *self, PyObject *item)
863 {
865 {
864 Py_ssize_t start, stop, step, slicelength;
866 Py_ssize_t start, stop, step, slicelength;
865 Py_ssize_t length = index_length(self);
867 Py_ssize_t length = index_length(self);
866
868
867 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
869 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
868 &start, &stop, &step, &slicelength) < 0)
870 &start, &stop, &step, &slicelength) < 0)
869 return -1;
871 return -1;
870
872
871 if (slicelength <= 0)
873 if (slicelength <= 0)
872 return 0;
874 return 0;
873
875
874 if ((step < 0 && start < stop) || (step > 0 && start > stop))
876 if ((step < 0 && start < stop) || (step > 0 && start > stop))
875 stop = start;
877 stop = start;
876
878
877 if (step < 0) {
879 if (step < 0) {
878 stop = start + 1;
880 stop = start + 1;
879 start = stop + step*(slicelength - 1) - 1;
881 start = stop + step*(slicelength - 1) - 1;
880 step = -step;
882 step = -step;
881 }
883 }
882
884
883 if (step != 1) {
885 if (step != 1) {
884 PyErr_SetString(PyExc_ValueError,
886 PyErr_SetString(PyExc_ValueError,
885 "revlog index delete requires step size of 1");
887 "revlog index delete requires step size of 1");
886 return -1;
888 return -1;
887 }
889 }
888
890
889 if (stop != length - 1) {
891 if (stop != length - 1) {
890 PyErr_SetString(PyExc_IndexError,
892 PyErr_SetString(PyExc_IndexError,
891 "revlog index deletion indices are invalid");
893 "revlog index deletion indices are invalid");
892 return -1;
894 return -1;
893 }
895 }
894
896
895 if (start < self->length - 1) {
897 if (start < self->length - 1) {
896 if (self->nt) {
898 if (self->nt) {
897 Py_ssize_t i;
899 Py_ssize_t i;
898
900
899 for (i = start + 1; i < self->length - 1; i++) {
901 for (i = start + 1; i < self->length - 1; i++) {
900 const char *node = index_node(self, i);
902 const char *node = index_node(self, i);
901
903
902 if (node)
904 if (node)
903 nt_insert(self, node, -1);
905 nt_insert(self, node, -1);
904 }
906 }
905 if (self->added)
907 if (self->added)
906 nt_invalidate_added(self, 0);
908 nt_invalidate_added(self, 0);
907 if (self->ntrev > start)
909 if (self->ntrev > start)
908 self->ntrev = (int)start;
910 self->ntrev = (int)start;
909 }
911 }
910 self->length = start + 1;
912 self->length = start + 1;
911 return 0;
913 return 0;
912 }
914 }
913
915
914 if (self->nt) {
916 if (self->nt) {
915 nt_invalidate_added(self, start - self->length + 1);
917 nt_invalidate_added(self, start - self->length + 1);
916 if (self->ntrev > start)
918 if (self->ntrev > start)
917 self->ntrev = (int)start;
919 self->ntrev = (int)start;
918 }
920 }
919 return self->added
921 return self->added
920 ? PyList_SetSlice(self->added, start - self->length + 1,
922 ? PyList_SetSlice(self->added, start - self->length + 1,
921 PyList_GET_SIZE(self->added), NULL)
923 PyList_GET_SIZE(self->added), NULL)
922 : 0;
924 : 0;
923 }
925 }
924
926
925 /*
927 /*
926 * Supported ops:
928 * Supported ops:
927 *
929 *
928 * slice deletion
930 * slice deletion
929 * string assignment (extend node->rev mapping)
931 * string assignment (extend node->rev mapping)
930 * string deletion (shrink node->rev mapping)
932 * string deletion (shrink node->rev mapping)
931 */
933 */
932 static int index_assign_subscript(indexObject *self, PyObject *item,
934 static int index_assign_subscript(indexObject *self, PyObject *item,
933 PyObject *value)
935 PyObject *value)
934 {
936 {
935 char *node;
937 char *node;
936 Py_ssize_t nodelen;
938 Py_ssize_t nodelen;
937 long rev;
939 long rev;
938
940
939 if (PySlice_Check(item) && value == NULL)
941 if (PySlice_Check(item) && value == NULL)
940 return index_slice_del(self, item);
942 return index_slice_del(self, item);
941
943
942 if (node_check(item, &node, &nodelen) == -1)
944 if (node_check(item, &node, &nodelen) == -1)
943 return -1;
945 return -1;
944
946
945 if (value == NULL)
947 if (value == NULL)
946 return self->nt ? nt_insert(self, node, -1) : 0;
948 return self->nt ? nt_insert(self, node, -1) : 0;
947 rev = PyInt_AsLong(value);
949 rev = PyInt_AsLong(value);
948 if (rev > INT_MAX || rev < 0) {
950 if (rev > INT_MAX || rev < 0) {
949 if (!PyErr_Occurred())
951 if (!PyErr_Occurred())
950 PyErr_SetString(PyExc_ValueError, "rev out of range");
952 PyErr_SetString(PyExc_ValueError, "rev out of range");
951 return -1;
953 return -1;
952 }
954 }
953 return nt_insert(self, node, (int)rev);
955 return nt_insert(self, node, (int)rev);
954 }
956 }
955
957
956 /*
958 /*
957 * Find all RevlogNG entries in an index that has inline data. Update
959 * Find all RevlogNG entries in an index that has inline data. Update
958 * the optional "offsets" table with those entries.
960 * the optional "offsets" table with those entries.
959 */
961 */
960 static long inline_scan(indexObject *self, const char **offsets)
962 static long inline_scan(indexObject *self, const char **offsets)
961 {
963 {
962 const char *data = PyString_AS_STRING(self->data);
964 const char *data = PyString_AS_STRING(self->data);
963 const char *end = data + PyString_GET_SIZE(self->data);
965 const char *end = data + PyString_GET_SIZE(self->data);
964 const long hdrsize = 64;
966 const long hdrsize = 64;
965 long incr = hdrsize;
967 long incr = hdrsize;
966 Py_ssize_t len = 0;
968 Py_ssize_t len = 0;
967
969
968 while (data + hdrsize <= end) {
970 while (data + hdrsize <= end) {
969 uint32_t comp_len;
971 uint32_t comp_len;
970 const char *old_data;
972 const char *old_data;
971 /* 3rd element of header is length of compressed inline data */
973 /* 3rd element of header is length of compressed inline data */
972 comp_len = getbe32(data + 8);
974 comp_len = getbe32(data + 8);
973 incr = hdrsize + comp_len;
975 incr = hdrsize + comp_len;
974 if (incr < hdrsize)
976 if (incr < hdrsize)
975 break;
977 break;
976 if (offsets)
978 if (offsets)
977 offsets[len] = data;
979 offsets[len] = data;
978 len++;
980 len++;
979 old_data = data;
981 old_data = data;
980 data += incr;
982 data += incr;
981 if (data <= old_data)
983 if (data <= old_data)
982 break;
984 break;
983 }
985 }
984
986
985 if (data != end && data + hdrsize != end) {
987 if (data != end && data + hdrsize != end) {
986 if (!PyErr_Occurred())
988 if (!PyErr_Occurred())
987 PyErr_SetString(PyExc_ValueError, "corrupt index file");
989 PyErr_SetString(PyExc_ValueError, "corrupt index file");
988 return -1;
990 return -1;
989 }
991 }
990
992
991 return len;
993 return len;
992 }
994 }
993
995
994 static int index_init(indexObject *self, PyObject *args)
996 static int index_init(indexObject *self, PyObject *args)
995 {
997 {
996 PyObject *data_obj, *inlined_obj;
998 PyObject *data_obj, *inlined_obj;
997 Py_ssize_t size;
999 Py_ssize_t size;
998
1000
999 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1001 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1000 return -1;
1002 return -1;
1001 if (!PyString_Check(data_obj)) {
1003 if (!PyString_Check(data_obj)) {
1002 PyErr_SetString(PyExc_TypeError, "data is not a string");
1004 PyErr_SetString(PyExc_TypeError, "data is not a string");
1003 return -1;
1005 return -1;
1004 }
1006 }
1005 size = PyString_GET_SIZE(data_obj);
1007 size = PyString_GET_SIZE(data_obj);
1006
1008
1007 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1009 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1008 self->data = data_obj;
1010 self->data = data_obj;
1009 self->cache = NULL;
1011 self->cache = NULL;
1010
1012
1011 self->added = NULL;
1013 self->added = NULL;
1012 self->offsets = NULL;
1014 self->offsets = NULL;
1013 self->nt = NULL;
1015 self->nt = NULL;
1014 self->ntlength = self->ntcapacity = 0;
1016 self->ntlength = self->ntcapacity = 0;
1015 self->ntdepth = self->ntsplits = 0;
1017 self->ntdepth = self->ntsplits = 0;
1016 self->ntlookups = self->ntmisses = 0;
1018 self->ntlookups = self->ntmisses = 0;
1017 self->ntrev = -1;
1019 self->ntrev = -1;
1018 Py_INCREF(self->data);
1020 Py_INCREF(self->data);
1019
1021
1020 if (self->inlined) {
1022 if (self->inlined) {
1021 long len = inline_scan(self, NULL);
1023 long len = inline_scan(self, NULL);
1022 if (len == -1)
1024 if (len == -1)
1023 goto bail;
1025 goto bail;
1024 self->raw_length = len;
1026 self->raw_length = len;
1025 self->length = len + 1;
1027 self->length = len + 1;
1026 } else {
1028 } else {
1027 if (size % 64) {
1029 if (size % 64) {
1028 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1030 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1029 goto bail;
1031 goto bail;
1030 }
1032 }
1031 self->raw_length = size / 64;
1033 self->raw_length = size / 64;
1032 self->length = self->raw_length + 1;
1034 self->length = self->raw_length + 1;
1033 }
1035 }
1034
1036
1035 return 0;
1037 return 0;
1036 bail:
1038 bail:
1037 return -1;
1039 return -1;
1038 }
1040 }
1039
1041
1040 static PyObject *index_nodemap(indexObject *self)
1042 static PyObject *index_nodemap(indexObject *self)
1041 {
1043 {
1042 Py_INCREF(self);
1044 Py_INCREF(self);
1043 return (PyObject *)self;
1045 return (PyObject *)self;
1044 }
1046 }
1045
1047
1046 static void index_dealloc(indexObject *self)
1048 static void index_dealloc(indexObject *self)
1047 {
1049 {
1048 _index_clearcaches(self);
1050 _index_clearcaches(self);
1049 Py_DECREF(self->data);
1051 Py_DECREF(self->data);
1050 Py_XDECREF(self->added);
1052 Py_XDECREF(self->added);
1051 PyObject_Del(self);
1053 PyObject_Del(self);
1052 }
1054 }
1053
1055
1054 static PySequenceMethods index_sequence_methods = {
1056 static PySequenceMethods index_sequence_methods = {
1055 (lenfunc)index_length, /* sq_length */
1057 (lenfunc)index_length, /* sq_length */
1056 0, /* sq_concat */
1058 0, /* sq_concat */
1057 0, /* sq_repeat */
1059 0, /* sq_repeat */
1058 (ssizeargfunc)index_get, /* sq_item */
1060 (ssizeargfunc)index_get, /* sq_item */
1059 0, /* sq_slice */
1061 0, /* sq_slice */
1060 0, /* sq_ass_item */
1062 0, /* sq_ass_item */
1061 0, /* sq_ass_slice */
1063 0, /* sq_ass_slice */
1062 (objobjproc)index_contains, /* sq_contains */
1064 (objobjproc)index_contains, /* sq_contains */
1063 };
1065 };
1064
1066
1065 static PyMappingMethods index_mapping_methods = {
1067 static PyMappingMethods index_mapping_methods = {
1066 (lenfunc)index_length, /* mp_length */
1068 (lenfunc)index_length, /* mp_length */
1067 (binaryfunc)index_getitem, /* mp_subscript */
1069 (binaryfunc)index_getitem, /* mp_subscript */
1068 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1070 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1069 };
1071 };
1070
1072
1071 static PyMethodDef index_methods[] = {
1073 static PyMethodDef index_methods[] = {
1072 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1074 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1073 "clear the index caches"},
1075 "clear the index caches"},
1074 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1076 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1075 "get an index entry"},
1077 "get an index entry"},
1076 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1078 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1077 "insert an index entry"},
1079 "insert an index entry"},
1078 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1080 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1079 "stats for the index"},
1081 "stats for the index"},
1080 {NULL} /* Sentinel */
1082 {NULL} /* Sentinel */
1081 };
1083 };
1082
1084
1083 static PyGetSetDef index_getset[] = {
1085 static PyGetSetDef index_getset[] = {
1084 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1086 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1085 {NULL} /* Sentinel */
1087 {NULL} /* Sentinel */
1086 };
1088 };
1087
1089
1088 static PyTypeObject indexType = {
1090 static PyTypeObject indexType = {
1089 PyObject_HEAD_INIT(NULL)
1091 PyObject_HEAD_INIT(NULL)
1090 0, /* ob_size */
1092 0, /* ob_size */
1091 "parsers.index", /* tp_name */
1093 "parsers.index", /* tp_name */
1092 sizeof(indexObject), /* tp_basicsize */
1094 sizeof(indexObject), /* tp_basicsize */
1093 0, /* tp_itemsize */
1095 0, /* tp_itemsize */
1094 (destructor)index_dealloc, /* tp_dealloc */
1096 (destructor)index_dealloc, /* tp_dealloc */
1095 0, /* tp_print */
1097 0, /* tp_print */
1096 0, /* tp_getattr */
1098 0, /* tp_getattr */
1097 0, /* tp_setattr */
1099 0, /* tp_setattr */
1098 0, /* tp_compare */
1100 0, /* tp_compare */
1099 0, /* tp_repr */
1101 0, /* tp_repr */
1100 0, /* tp_as_number */
1102 0, /* tp_as_number */
1101 &index_sequence_methods, /* tp_as_sequence */
1103 &index_sequence_methods, /* tp_as_sequence */
1102 &index_mapping_methods, /* tp_as_mapping */
1104 &index_mapping_methods, /* tp_as_mapping */
1103 0, /* tp_hash */
1105 0, /* tp_hash */
1104 0, /* tp_call */
1106 0, /* tp_call */
1105 0, /* tp_str */
1107 0, /* tp_str */
1106 0, /* tp_getattro */
1108 0, /* tp_getattro */
1107 0, /* tp_setattro */
1109 0, /* tp_setattro */
1108 0, /* tp_as_buffer */
1110 0, /* tp_as_buffer */
1109 Py_TPFLAGS_DEFAULT, /* tp_flags */
1111 Py_TPFLAGS_DEFAULT, /* tp_flags */
1110 "revlog index", /* tp_doc */
1112 "revlog index", /* tp_doc */
1111 0, /* tp_traverse */
1113 0, /* tp_traverse */
1112 0, /* tp_clear */
1114 0, /* tp_clear */
1113 0, /* tp_richcompare */
1115 0, /* tp_richcompare */
1114 0, /* tp_weaklistoffset */
1116 0, /* tp_weaklistoffset */
1115 0, /* tp_iter */
1117 0, /* tp_iter */
1116 0, /* tp_iternext */
1118 0, /* tp_iternext */
1117 index_methods, /* tp_methods */
1119 index_methods, /* tp_methods */
1118 0, /* tp_members */
1120 0, /* tp_members */
1119 index_getset, /* tp_getset */
1121 index_getset, /* tp_getset */
1120 0, /* tp_base */
1122 0, /* tp_base */
1121 0, /* tp_dict */
1123 0, /* tp_dict */
1122 0, /* tp_descr_get */
1124 0, /* tp_descr_get */
1123 0, /* tp_descr_set */
1125 0, /* tp_descr_set */
1124 0, /* tp_dictoffset */
1126 0, /* tp_dictoffset */
1125 (initproc)index_init, /* tp_init */
1127 (initproc)index_init, /* tp_init */
1126 0, /* tp_alloc */
1128 0, /* tp_alloc */
1127 };
1129 };
1128
1130
1129 /*
1131 /*
1130 * returns a tuple of the form (index, index, cache) with elements as
1132 * returns a tuple of the form (index, index, cache) with elements as
1131 * follows:
1133 * follows:
1132 *
1134 *
1133 * index: an index object that lazily parses RevlogNG records
1135 * index: an index object that lazily parses RevlogNG records
1134 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1136 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1135 *
1137 *
1136 * added complications are for backwards compatibility
1138 * added complications are for backwards compatibility
1137 */
1139 */
1138 static PyObject *parse_index2(PyObject *self, PyObject *args)
1140 static PyObject *parse_index2(PyObject *self, PyObject *args)
1139 {
1141 {
1140 PyObject *tuple = NULL, *cache = NULL;
1142 PyObject *tuple = NULL, *cache = NULL;
1141 indexObject *idx;
1143 indexObject *idx;
1142 int ret;
1144 int ret;
1143
1145
1144 idx = PyObject_New(indexObject, &indexType);
1146 idx = PyObject_New(indexObject, &indexType);
1145 if (idx == NULL)
1147 if (idx == NULL)
1146 goto bail;
1148 goto bail;
1147
1149
1148 ret = index_init(idx, args);
1150 ret = index_init(idx, args);
1149 if (ret == -1)
1151 if (ret == -1)
1150 goto bail;
1152 goto bail;
1151
1153
1152 if (idx->inlined) {
1154 if (idx->inlined) {
1153 cache = Py_BuildValue("iO", 0, idx->data);
1155 cache = Py_BuildValue("iO", 0, idx->data);
1154 if (cache == NULL)
1156 if (cache == NULL)
1155 goto bail;
1157 goto bail;
1156 } else {
1158 } else {
1157 cache = Py_None;
1159 cache = Py_None;
1158 Py_INCREF(cache);
1160 Py_INCREF(cache);
1159 }
1161 }
1160
1162
1161 tuple = Py_BuildValue("NN", idx, cache);
1163 tuple = Py_BuildValue("NN", idx, cache);
1162 if (!tuple)
1164 if (!tuple)
1163 goto bail;
1165 goto bail;
1164 return tuple;
1166 return tuple;
1165
1167
1166 bail:
1168 bail:
1167 Py_XDECREF(idx);
1169 Py_XDECREF(idx);
1168 Py_XDECREF(cache);
1170 Py_XDECREF(cache);
1169 Py_XDECREF(tuple);
1171 Py_XDECREF(tuple);
1170 return NULL;
1172 return NULL;
1171 }
1173 }
1172
1174
1173 static char parsers_doc[] = "Efficient content parsing.";
1175 static char parsers_doc[] = "Efficient content parsing.";
1174
1176
1175 static PyMethodDef methods[] = {
1177 static PyMethodDef methods[] = {
1176 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1178 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1177 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1179 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1178 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1180 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1179 {NULL, NULL}
1181 {NULL, NULL}
1180 };
1182 };
1181
1183
1182 static void module_init(PyObject *mod)
1184 static void module_init(PyObject *mod)
1183 {
1185 {
1184 indexType.tp_new = PyType_GenericNew;
1186 indexType.tp_new = PyType_GenericNew;
1185 if (PyType_Ready(&indexType) < 0)
1187 if (PyType_Ready(&indexType) < 0)
1186 return;
1188 return;
1187 Py_INCREF(&indexType);
1189 Py_INCREF(&indexType);
1188
1190
1189 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1191 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1190
1192
1191 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1193 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1192 -1, -1, -1, -1, nullid, 20);
1194 -1, -1, -1, -1, nullid, 20);
1193 if (nullentry)
1195 if (nullentry)
1194 PyObject_GC_UnTrack(nullentry);
1196 PyObject_GC_UnTrack(nullentry);
1195 }
1197 }
1196
1198
1197 #ifdef IS_PY3K
1199 #ifdef IS_PY3K
1198 static struct PyModuleDef parsers_module = {
1200 static struct PyModuleDef parsers_module = {
1199 PyModuleDef_HEAD_INIT,
1201 PyModuleDef_HEAD_INIT,
1200 "parsers",
1202 "parsers",
1201 parsers_doc,
1203 parsers_doc,
1202 -1,
1204 -1,
1203 methods
1205 methods
1204 };
1206 };
1205
1207
1206 PyMODINIT_FUNC PyInit_parsers(void)
1208 PyMODINIT_FUNC PyInit_parsers(void)
1207 {
1209 {
1208 PyObject *mod = PyModule_Create(&parsers_module);
1210 PyObject *mod = PyModule_Create(&parsers_module);
1209 module_init(mod);
1211 module_init(mod);
1210 return mod;
1212 return mod;
1211 }
1213 }
1212 #else
1214 #else
1213 PyMODINIT_FUNC initparsers(void)
1215 PyMODINIT_FUNC initparsers(void)
1214 {
1216 {
1215 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1217 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1216 module_init(mod);
1218 module_init(mod);
1217 }
1219 }
1218 #endif
1220 #endif
General Comments 0
You need to be logged in to leave comments. Login now