##// END OF EJS Templates
parsers: allow nt_find to signal an ambiguous match
Bryan O'Sullivan -
r16616:8f79aabd default
parent child Browse files
Show More
@@ -1,1197 +1,1205 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <string.h>
12 #include <string.h>
13
13
14 #include "util.h"
14 #include "util.h"
15
15
16 static int hexdigit(char c)
16 static int hexdigit(char c)
17 {
17 {
18 if (c >= '0' && c <= '9')
18 if (c >= '0' && c <= '9')
19 return c - '0';
19 return c - '0';
20 if (c >= 'a' && c <= 'f')
20 if (c >= 'a' && c <= 'f')
21 return c - 'a' + 10;
21 return c - 'a' + 10;
22 if (c >= 'A' && c <= 'F')
22 if (c >= 'A' && c <= 'F')
23 return c - 'A' + 10;
23 return c - 'A' + 10;
24
24
25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
26 return 0;
26 return 0;
27 }
27 }
28
28
29 /*
29 /*
30 * Turn a hex-encoded string into binary.
30 * Turn a hex-encoded string into binary.
31 */
31 */
32 static PyObject *unhexlify(const char *str, int len)
32 static PyObject *unhexlify(const char *str, int len)
33 {
33 {
34 PyObject *ret;
34 PyObject *ret;
35 const char *c;
35 const char *c;
36 char *d;
36 char *d;
37
37
38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
39
39
40 if (!ret)
40 if (!ret)
41 return NULL;
41 return NULL;
42
42
43 d = PyBytes_AsString(ret);
43 d = PyBytes_AsString(ret);
44
44
45 for (c = str; c < str + len;) {
45 for (c = str; c < str + len;) {
46 int hi = hexdigit(*c++);
46 int hi = hexdigit(*c++);
47 int lo = hexdigit(*c++);
47 int lo = hexdigit(*c++);
48 *d++ = (hi << 4) | lo;
48 *d++ = (hi << 4) | lo;
49 }
49 }
50
50
51 return ret;
51 return ret;
52 }
52 }
53
53
54 /*
54 /*
55 * This code assumes that a manifest is stitched together with newline
55 * This code assumes that a manifest is stitched together with newline
56 * ('\n') characters.
56 * ('\n') characters.
57 */
57 */
58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
59 {
59 {
60 PyObject *mfdict, *fdict;
60 PyObject *mfdict, *fdict;
61 char *str, *cur, *start, *zero;
61 char *str, *cur, *start, *zero;
62 int len;
62 int len;
63
63
64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
65 &PyDict_Type, &mfdict,
65 &PyDict_Type, &mfdict,
66 &PyDict_Type, &fdict,
66 &PyDict_Type, &fdict,
67 &str, &len))
67 &str, &len))
68 goto quit;
68 goto quit;
69
69
70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
71 PyObject *file = NULL, *node = NULL;
71 PyObject *file = NULL, *node = NULL;
72 PyObject *flags = NULL;
72 PyObject *flags = NULL;
73 int nlen;
73 int nlen;
74
74
75 if (!*cur) {
75 if (!*cur) {
76 zero = cur;
76 zero = cur;
77 continue;
77 continue;
78 }
78 }
79 else if (*cur != '\n')
79 else if (*cur != '\n')
80 continue;
80 continue;
81
81
82 if (!zero) {
82 if (!zero) {
83 PyErr_SetString(PyExc_ValueError,
83 PyErr_SetString(PyExc_ValueError,
84 "manifest entry has no separator");
84 "manifest entry has no separator");
85 goto quit;
85 goto quit;
86 }
86 }
87
87
88 file = PyBytes_FromStringAndSize(start, zero - start);
88 file = PyBytes_FromStringAndSize(start, zero - start);
89
89
90 if (!file)
90 if (!file)
91 goto bail;
91 goto bail;
92
92
93 nlen = cur - zero - 1;
93 nlen = cur - zero - 1;
94
94
95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
96 if (!node)
96 if (!node)
97 goto bail;
97 goto bail;
98
98
99 if (nlen > 40) {
99 if (nlen > 40) {
100 flags = PyBytes_FromStringAndSize(zero + 41,
100 flags = PyBytes_FromStringAndSize(zero + 41,
101 nlen - 40);
101 nlen - 40);
102 if (!flags)
102 if (!flags)
103 goto bail;
103 goto bail;
104
104
105 if (PyDict_SetItem(fdict, file, flags) == -1)
105 if (PyDict_SetItem(fdict, file, flags) == -1)
106 goto bail;
106 goto bail;
107 }
107 }
108
108
109 if (PyDict_SetItem(mfdict, file, node) == -1)
109 if (PyDict_SetItem(mfdict, file, node) == -1)
110 goto bail;
110 goto bail;
111
111
112 start = cur + 1;
112 start = cur + 1;
113 zero = NULL;
113 zero = NULL;
114
114
115 Py_XDECREF(flags);
115 Py_XDECREF(flags);
116 Py_XDECREF(node);
116 Py_XDECREF(node);
117 Py_XDECREF(file);
117 Py_XDECREF(file);
118 continue;
118 continue;
119 bail:
119 bail:
120 Py_XDECREF(flags);
120 Py_XDECREF(flags);
121 Py_XDECREF(node);
121 Py_XDECREF(node);
122 Py_XDECREF(file);
122 Py_XDECREF(file);
123 goto quit;
123 goto quit;
124 }
124 }
125
125
126 if (len > 0 && *(cur - 1) != '\n') {
126 if (len > 0 && *(cur - 1) != '\n') {
127 PyErr_SetString(PyExc_ValueError,
127 PyErr_SetString(PyExc_ValueError,
128 "manifest contains trailing garbage");
128 "manifest contains trailing garbage");
129 goto quit;
129 goto quit;
130 }
130 }
131
131
132 Py_INCREF(Py_None);
132 Py_INCREF(Py_None);
133 return Py_None;
133 return Py_None;
134 quit:
134 quit:
135 return NULL;
135 return NULL;
136 }
136 }
137
137
138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
139 {
139 {
140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
142 char *str, *cur, *end, *cpos;
142 char *str, *cur, *end, *cpos;
143 int state, mode, size, mtime;
143 int state, mode, size, mtime;
144 unsigned int flen;
144 unsigned int flen;
145 int len;
145 int len;
146
146
147 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
147 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
148 &PyDict_Type, &dmap,
148 &PyDict_Type, &dmap,
149 &PyDict_Type, &cmap,
149 &PyDict_Type, &cmap,
150 &str, &len))
150 &str, &len))
151 goto quit;
151 goto quit;
152
152
153 /* read parents */
153 /* read parents */
154 if (len < 40)
154 if (len < 40)
155 goto quit;
155 goto quit;
156
156
157 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
157 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
158 if (!parents)
158 if (!parents)
159 goto quit;
159 goto quit;
160
160
161 /* read filenames */
161 /* read filenames */
162 cur = str + 40;
162 cur = str + 40;
163 end = str + len;
163 end = str + len;
164
164
165 while (cur < end - 17) {
165 while (cur < end - 17) {
166 /* unpack header */
166 /* unpack header */
167 state = *cur;
167 state = *cur;
168 mode = getbe32(cur + 1);
168 mode = getbe32(cur + 1);
169 size = getbe32(cur + 5);
169 size = getbe32(cur + 5);
170 mtime = getbe32(cur + 9);
170 mtime = getbe32(cur + 9);
171 flen = getbe32(cur + 13);
171 flen = getbe32(cur + 13);
172 cur += 17;
172 cur += 17;
173 if (cur + flen > end || cur + flen < cur) {
173 if (cur + flen > end || cur + flen < cur) {
174 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
174 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
175 goto quit;
175 goto quit;
176 }
176 }
177
177
178 entry = Py_BuildValue("ciii", state, mode, size, mtime);
178 entry = Py_BuildValue("ciii", state, mode, size, mtime);
179 if (!entry)
179 if (!entry)
180 goto quit;
180 goto quit;
181 PyObject_GC_UnTrack(entry); /* don't waste time with this */
181 PyObject_GC_UnTrack(entry); /* don't waste time with this */
182
182
183 cpos = memchr(cur, 0, flen);
183 cpos = memchr(cur, 0, flen);
184 if (cpos) {
184 if (cpos) {
185 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
185 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
186 cname = PyBytes_FromStringAndSize(cpos + 1,
186 cname = PyBytes_FromStringAndSize(cpos + 1,
187 flen - (cpos - cur) - 1);
187 flen - (cpos - cur) - 1);
188 if (!fname || !cname ||
188 if (!fname || !cname ||
189 PyDict_SetItem(cmap, fname, cname) == -1 ||
189 PyDict_SetItem(cmap, fname, cname) == -1 ||
190 PyDict_SetItem(dmap, fname, entry) == -1)
190 PyDict_SetItem(dmap, fname, entry) == -1)
191 goto quit;
191 goto quit;
192 Py_DECREF(cname);
192 Py_DECREF(cname);
193 } else {
193 } else {
194 fname = PyBytes_FromStringAndSize(cur, flen);
194 fname = PyBytes_FromStringAndSize(cur, flen);
195 if (!fname ||
195 if (!fname ||
196 PyDict_SetItem(dmap, fname, entry) == -1)
196 PyDict_SetItem(dmap, fname, entry) == -1)
197 goto quit;
197 goto quit;
198 }
198 }
199 cur += flen;
199 cur += flen;
200 Py_DECREF(fname);
200 Py_DECREF(fname);
201 Py_DECREF(entry);
201 Py_DECREF(entry);
202 fname = cname = entry = NULL;
202 fname = cname = entry = NULL;
203 }
203 }
204
204
205 ret = parents;
205 ret = parents;
206 Py_INCREF(ret);
206 Py_INCREF(ret);
207 quit:
207 quit:
208 Py_XDECREF(fname);
208 Py_XDECREF(fname);
209 Py_XDECREF(cname);
209 Py_XDECREF(cname);
210 Py_XDECREF(entry);
210 Py_XDECREF(entry);
211 Py_XDECREF(parents);
211 Py_XDECREF(parents);
212 return ret;
212 return ret;
213 }
213 }
214
214
215 /*
215 /*
216 * A base-16 trie for fast node->rev mapping.
216 * A base-16 trie for fast node->rev mapping.
217 *
217 *
218 * Positive value is index of the next node in the trie
218 * Positive value is index of the next node in the trie
219 * Negative value is a leaf: -(rev + 1)
219 * Negative value is a leaf: -(rev + 1)
220 * Zero is empty
220 * Zero is empty
221 */
221 */
222 typedef struct {
222 typedef struct {
223 int children[16];
223 int children[16];
224 } nodetree;
224 } nodetree;
225
225
226 /*
226 /*
227 * This class has two behaviours.
227 * This class has two behaviours.
228 *
228 *
229 * When used in a list-like way (with integer keys), we decode an
229 * When used in a list-like way (with integer keys), we decode an
230 * entry in a RevlogNG index file on demand. Our last entry is a
230 * entry in a RevlogNG index file on demand. Our last entry is a
231 * sentinel, always a nullid. We have limited support for
231 * sentinel, always a nullid. We have limited support for
232 * integer-keyed insert and delete, only at elements right before the
232 * integer-keyed insert and delete, only at elements right before the
233 * sentinel.
233 * sentinel.
234 *
234 *
235 * With string keys, we lazily perform a reverse mapping from node to
235 * With string keys, we lazily perform a reverse mapping from node to
236 * rev, using a base-16 trie.
236 * rev, using a base-16 trie.
237 */
237 */
238 typedef struct {
238 typedef struct {
239 PyObject_HEAD
239 PyObject_HEAD
240 /* Type-specific fields go here. */
240 /* Type-specific fields go here. */
241 PyObject *data; /* raw bytes of index */
241 PyObject *data; /* raw bytes of index */
242 PyObject **cache; /* cached tuples */
242 PyObject **cache; /* cached tuples */
243 const char **offsets; /* populated on demand */
243 const char **offsets; /* populated on demand */
244 Py_ssize_t raw_length; /* original number of elements */
244 Py_ssize_t raw_length; /* original number of elements */
245 Py_ssize_t length; /* current number of elements */
245 Py_ssize_t length; /* current number of elements */
246 PyObject *added; /* populated on demand */
246 PyObject *added; /* populated on demand */
247 nodetree *nt; /* base-16 trie */
247 nodetree *nt; /* base-16 trie */
248 int ntlength; /* # nodes in use */
248 int ntlength; /* # nodes in use */
249 int ntcapacity; /* # nodes allocated */
249 int ntcapacity; /* # nodes allocated */
250 int ntdepth; /* maximum depth of tree */
250 int ntdepth; /* maximum depth of tree */
251 int ntsplits; /* # splits performed */
251 int ntsplits; /* # splits performed */
252 int ntrev; /* last rev scanned */
252 int ntrev; /* last rev scanned */
253 int ntlookups; /* # lookups */
253 int ntlookups; /* # lookups */
254 int ntmisses; /* # lookups that miss the cache */
254 int ntmisses; /* # lookups that miss the cache */
255 int inlined;
255 int inlined;
256 } indexObject;
256 } indexObject;
257
257
258 static Py_ssize_t index_length(const indexObject *self)
258 static Py_ssize_t index_length(const indexObject *self)
259 {
259 {
260 if (self->added == NULL)
260 if (self->added == NULL)
261 return self->length;
261 return self->length;
262 return self->length + PyList_GET_SIZE(self->added);
262 return self->length + PyList_GET_SIZE(self->added);
263 }
263 }
264
264
265 static PyObject *nullentry;
265 static PyObject *nullentry;
266 static const char nullid[20];
266 static const char nullid[20];
267
267
268 static long inline_scan(indexObject *self, const char **offsets);
268 static long inline_scan(indexObject *self, const char **offsets);
269
269
270 #if LONG_MAX == 0x7fffffffL
270 #if LONG_MAX == 0x7fffffffL
271 static char *tuple_format = "Kiiiiiis#";
271 static char *tuple_format = "Kiiiiiis#";
272 #else
272 #else
273 static char *tuple_format = "kiiiiiis#";
273 static char *tuple_format = "kiiiiiis#";
274 #endif
274 #endif
275
275
276 /*
276 /*
277 * Return a pointer to the beginning of a RevlogNG record.
277 * Return a pointer to the beginning of a RevlogNG record.
278 */
278 */
279 static const char *index_deref(indexObject *self, Py_ssize_t pos)
279 static const char *index_deref(indexObject *self, Py_ssize_t pos)
280 {
280 {
281 if (self->inlined && pos > 0) {
281 if (self->inlined && pos > 0) {
282 if (self->offsets == NULL) {
282 if (self->offsets == NULL) {
283 self->offsets = malloc(self->raw_length *
283 self->offsets = malloc(self->raw_length *
284 sizeof(*self->offsets));
284 sizeof(*self->offsets));
285 if (self->offsets == NULL)
285 if (self->offsets == NULL)
286 return (const char *)PyErr_NoMemory();
286 return (const char *)PyErr_NoMemory();
287 inline_scan(self, self->offsets);
287 inline_scan(self, self->offsets);
288 }
288 }
289 return self->offsets[pos];
289 return self->offsets[pos];
290 }
290 }
291
291
292 return PyString_AS_STRING(self->data) + pos * 64;
292 return PyString_AS_STRING(self->data) + pos * 64;
293 }
293 }
294
294
295 /*
295 /*
296 * RevlogNG format (all in big endian, data may be inlined):
296 * RevlogNG format (all in big endian, data may be inlined):
297 * 6 bytes: offset
297 * 6 bytes: offset
298 * 2 bytes: flags
298 * 2 bytes: flags
299 * 4 bytes: compressed length
299 * 4 bytes: compressed length
300 * 4 bytes: uncompressed length
300 * 4 bytes: uncompressed length
301 * 4 bytes: base revision
301 * 4 bytes: base revision
302 * 4 bytes: link revision
302 * 4 bytes: link revision
303 * 4 bytes: parent 1 revision
303 * 4 bytes: parent 1 revision
304 * 4 bytes: parent 2 revision
304 * 4 bytes: parent 2 revision
305 * 32 bytes: nodeid (only 20 bytes used)
305 * 32 bytes: nodeid (only 20 bytes used)
306 */
306 */
307 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
307 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
308 {
308 {
309 uint64_t offset_flags;
309 uint64_t offset_flags;
310 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
310 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
311 const char *c_node_id;
311 const char *c_node_id;
312 const char *data;
312 const char *data;
313 Py_ssize_t length = index_length(self);
313 Py_ssize_t length = index_length(self);
314 PyObject *entry;
314 PyObject *entry;
315
315
316 if (pos < 0)
316 if (pos < 0)
317 pos += length;
317 pos += length;
318
318
319 if (pos < 0 || pos >= length) {
319 if (pos < 0 || pos >= length) {
320 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
320 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
321 return NULL;
321 return NULL;
322 }
322 }
323
323
324 if (pos == length - 1) {
324 if (pos == length - 1) {
325 Py_INCREF(nullentry);
325 Py_INCREF(nullentry);
326 return nullentry;
326 return nullentry;
327 }
327 }
328
328
329 if (pos >= self->length - 1) {
329 if (pos >= self->length - 1) {
330 PyObject *obj;
330 PyObject *obj;
331 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
331 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
332 Py_INCREF(obj);
332 Py_INCREF(obj);
333 return obj;
333 return obj;
334 }
334 }
335
335
336 if (self->cache) {
336 if (self->cache) {
337 if (self->cache[pos]) {
337 if (self->cache[pos]) {
338 Py_INCREF(self->cache[pos]);
338 Py_INCREF(self->cache[pos]);
339 return self->cache[pos];
339 return self->cache[pos];
340 }
340 }
341 } else {
341 } else {
342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
343 if (self->cache == NULL)
343 if (self->cache == NULL)
344 return PyErr_NoMemory();
344 return PyErr_NoMemory();
345 }
345 }
346
346
347 data = index_deref(self, pos);
347 data = index_deref(self, pos);
348 if (data == NULL)
348 if (data == NULL)
349 return NULL;
349 return NULL;
350
350
351 offset_flags = getbe32(data + 4);
351 offset_flags = getbe32(data + 4);
352 if (pos == 0) /* mask out version number for the first entry */
352 if (pos == 0) /* mask out version number for the first entry */
353 offset_flags &= 0xFFFF;
353 offset_flags &= 0xFFFF;
354 else {
354 else {
355 uint32_t offset_high = getbe32(data);
355 uint32_t offset_high = getbe32(data);
356 offset_flags |= ((uint64_t)offset_high) << 32;
356 offset_flags |= ((uint64_t)offset_high) << 32;
357 }
357 }
358
358
359 comp_len = getbe32(data + 8);
359 comp_len = getbe32(data + 8);
360 uncomp_len = getbe32(data + 12);
360 uncomp_len = getbe32(data + 12);
361 base_rev = getbe32(data + 16);
361 base_rev = getbe32(data + 16);
362 link_rev = getbe32(data + 20);
362 link_rev = getbe32(data + 20);
363 parent_1 = getbe32(data + 24);
363 parent_1 = getbe32(data + 24);
364 parent_2 = getbe32(data + 28);
364 parent_2 = getbe32(data + 28);
365 c_node_id = data + 32;
365 c_node_id = data + 32;
366
366
367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
368 uncomp_len, base_rev, link_rev,
368 uncomp_len, base_rev, link_rev,
369 parent_1, parent_2, c_node_id, 20);
369 parent_1, parent_2, c_node_id, 20);
370
370
371 if (entry)
371 if (entry)
372 PyObject_GC_UnTrack(entry);
372 PyObject_GC_UnTrack(entry);
373
373
374 self->cache[pos] = entry;
374 self->cache[pos] = entry;
375 Py_INCREF(entry);
375 Py_INCREF(entry);
376
376
377 return entry;
377 return entry;
378 }
378 }
379
379
380 /*
380 /*
381 * Return the 20-byte SHA of the node corresponding to the given rev.
381 * Return the 20-byte SHA of the node corresponding to the given rev.
382 */
382 */
383 static const char *index_node(indexObject *self, Py_ssize_t pos)
383 static const char *index_node(indexObject *self, Py_ssize_t pos)
384 {
384 {
385 Py_ssize_t length = index_length(self);
385 Py_ssize_t length = index_length(self);
386 const char *data;
386 const char *data;
387
387
388 if (pos == length - 1)
388 if (pos == length - 1)
389 return nullid;
389 return nullid;
390
390
391 if (pos >= length)
391 if (pos >= length)
392 return NULL;
392 return NULL;
393
393
394 if (pos >= self->length - 1) {
394 if (pos >= self->length - 1) {
395 PyObject *tuple, *str;
395 PyObject *tuple, *str;
396 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
396 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
397 str = PyTuple_GetItem(tuple, 7);
397 str = PyTuple_GetItem(tuple, 7);
398 return str ? PyString_AS_STRING(str) : NULL;
398 return str ? PyString_AS_STRING(str) : NULL;
399 }
399 }
400
400
401 data = index_deref(self, pos);
401 data = index_deref(self, pos);
402 return data ? data + 32 : NULL;
402 return data ? data + 32 : NULL;
403 }
403 }
404
404
405 static int nt_insert(indexObject *self, const char *node, int rev);
405 static int nt_insert(indexObject *self, const char *node, int rev);
406
406
407 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
407 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
408 {
408 {
409 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
409 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
410 return -1;
410 return -1;
411 if (*nodelen == 20)
411 if (*nodelen == 20)
412 return 0;
412 return 0;
413 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
413 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
414 return -1;
414 return -1;
415 }
415 }
416
416
417 static PyObject *index_insert(indexObject *self, PyObject *args)
417 static PyObject *index_insert(indexObject *self, PyObject *args)
418 {
418 {
419 PyObject *obj;
419 PyObject *obj;
420 char *node;
420 char *node;
421 long offset;
421 long offset;
422 Py_ssize_t len, nodelen;
422 Py_ssize_t len, nodelen;
423
423
424 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
424 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
425 return NULL;
425 return NULL;
426
426
427 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
427 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
428 PyErr_SetString(PyExc_TypeError, "8-tuple required");
428 PyErr_SetString(PyExc_TypeError, "8-tuple required");
429 return NULL;
429 return NULL;
430 }
430 }
431
431
432 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
432 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
433 return NULL;
433 return NULL;
434
434
435 len = index_length(self);
435 len = index_length(self);
436
436
437 if (offset < 0)
437 if (offset < 0)
438 offset += len;
438 offset += len;
439
439
440 if (offset != len - 1) {
440 if (offset != len - 1) {
441 PyErr_SetString(PyExc_IndexError,
441 PyErr_SetString(PyExc_IndexError,
442 "insert only supported at index -1");
442 "insert only supported at index -1");
443 return NULL;
443 return NULL;
444 }
444 }
445
445
446 if (offset > INT_MAX) {
446 if (offset > INT_MAX) {
447 PyErr_SetString(PyExc_ValueError,
447 PyErr_SetString(PyExc_ValueError,
448 "currently only 2**31 revs supported");
448 "currently only 2**31 revs supported");
449 return NULL;
449 return NULL;
450 }
450 }
451
451
452 if (self->added == NULL) {
452 if (self->added == NULL) {
453 self->added = PyList_New(0);
453 self->added = PyList_New(0);
454 if (self->added == NULL)
454 if (self->added == NULL)
455 return NULL;
455 return NULL;
456 }
456 }
457
457
458 if (PyList_Append(self->added, obj) == -1)
458 if (PyList_Append(self->added, obj) == -1)
459 return NULL;
459 return NULL;
460
460
461 if (self->nt)
461 if (self->nt)
462 nt_insert(self, node, (int)offset);
462 nt_insert(self, node, (int)offset);
463
463
464 Py_RETURN_NONE;
464 Py_RETURN_NONE;
465 }
465 }
466
466
467 static void _index_clearcaches(indexObject *self)
467 static void _index_clearcaches(indexObject *self)
468 {
468 {
469 if (self->cache) {
469 if (self->cache) {
470 Py_ssize_t i;
470 Py_ssize_t i;
471
471
472 for (i = 0; i < self->raw_length; i++) {
472 for (i = 0; i < self->raw_length; i++) {
473 if (self->cache[i]) {
473 if (self->cache[i]) {
474 Py_DECREF(self->cache[i]);
474 Py_DECREF(self->cache[i]);
475 self->cache[i] = NULL;
475 self->cache[i] = NULL;
476 }
476 }
477 }
477 }
478 free(self->cache);
478 free(self->cache);
479 self->cache = NULL;
479 self->cache = NULL;
480 }
480 }
481 if (self->offsets) {
481 if (self->offsets) {
482 free(self->offsets);
482 free(self->offsets);
483 self->offsets = NULL;
483 self->offsets = NULL;
484 }
484 }
485 if (self->nt) {
485 if (self->nt) {
486 free(self->nt);
486 free(self->nt);
487 self->nt = NULL;
487 self->nt = NULL;
488 }
488 }
489 }
489 }
490
490
491 static PyObject *index_clearcaches(indexObject *self)
491 static PyObject *index_clearcaches(indexObject *self)
492 {
492 {
493 _index_clearcaches(self);
493 _index_clearcaches(self);
494 self->ntlength = self->ntcapacity = 0;
494 self->ntlength = self->ntcapacity = 0;
495 self->ntdepth = self->ntsplits = 0;
495 self->ntdepth = self->ntsplits = 0;
496 self->ntrev = -1;
496 self->ntrev = -1;
497 self->ntlookups = self->ntmisses = 0;
497 self->ntlookups = self->ntmisses = 0;
498 Py_RETURN_NONE;
498 Py_RETURN_NONE;
499 }
499 }
500
500
501 static PyObject *index_stats(indexObject *self)
501 static PyObject *index_stats(indexObject *self)
502 {
502 {
503 PyObject *obj = PyDict_New();
503 PyObject *obj = PyDict_New();
504
504
505 if (obj == NULL)
505 if (obj == NULL)
506 return NULL;
506 return NULL;
507
507
508 #define istat(__n, __d) \
508 #define istat(__n, __d) \
509 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
509 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
510 goto bail;
510 goto bail;
511
511
512 if (self->added) {
512 if (self->added) {
513 Py_ssize_t len = PyList_GET_SIZE(self->added);
513 Py_ssize_t len = PyList_GET_SIZE(self->added);
514 if (PyDict_SetItemString(obj, "index entries added",
514 if (PyDict_SetItemString(obj, "index entries added",
515 PyInt_FromLong(len)) == -1)
515 PyInt_FromLong(len)) == -1)
516 goto bail;
516 goto bail;
517 }
517 }
518
518
519 if (self->raw_length != self->length - 1)
519 if (self->raw_length != self->length - 1)
520 istat(raw_length, "revs on disk");
520 istat(raw_length, "revs on disk");
521 istat(length, "revs in memory");
521 istat(length, "revs in memory");
522 istat(ntcapacity, "node trie capacity");
522 istat(ntcapacity, "node trie capacity");
523 istat(ntdepth, "node trie depth");
523 istat(ntdepth, "node trie depth");
524 istat(ntlength, "node trie count");
524 istat(ntlength, "node trie count");
525 istat(ntlookups, "node trie lookups");
525 istat(ntlookups, "node trie lookups");
526 istat(ntmisses, "node trie misses");
526 istat(ntmisses, "node trie misses");
527 istat(ntrev, "node trie last rev scanned");
527 istat(ntrev, "node trie last rev scanned");
528 istat(ntsplits, "node trie splits");
528 istat(ntsplits, "node trie splits");
529
529
530 #undef istat
530 #undef istat
531
531
532 return obj;
532 return obj;
533
533
534 bail:
534 bail:
535 Py_XDECREF(obj);
535 Py_XDECREF(obj);
536 return NULL;
536 return NULL;
537 }
537 }
538
538
539 static inline int nt_level(const char *node, int level)
539 static inline int nt_level(const char *node, int level)
540 {
540 {
541 int v = node[level>>1];
541 int v = node[level>>1];
542 if (!(level & 1))
542 if (!(level & 1))
543 v >>= 4;
543 v >>= 4;
544 return v & 0xf;
544 return v & 0xf;
545 }
545 }
546
546
547 /*
548 * Return values:
549 *
550 * -4: match is ambiguous (multiple candidates)
551 * -2: not found
552 * rest: valid rev
553 */
547 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
554 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
548 {
555 {
549 int level, off;
556 int level, off;
550
557
551 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
558 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
552 return -1;
559 return -1;
553
560
554 if (self->nt == NULL)
561 if (self->nt == NULL)
555 return -2;
562 return -2;
556
563
557 for (level = off = 0; level < nodelen; level++) {
564 for (level = off = 0; level < nodelen; level++) {
558 int k = nt_level(node, level);
565 int k = nt_level(node, level);
559 nodetree *n = &self->nt[off];
566 nodetree *n = &self->nt[off];
560 int v = n->children[k];
567 int v = n->children[k];
561
568
562 if (v < 0) {
569 if (v < 0) {
563 const char *n;
570 const char *n;
564 v = -v - 1;
571 v = -v - 1;
565 n = index_node(self, v);
572 n = index_node(self, v);
566 if (n == NULL)
573 if (n == NULL)
567 return -2;
574 return -2;
568 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
575 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
569 ? -2 : v;
576 ? -2 : v;
570 }
577 }
571 if (v == 0)
578 if (v == 0)
572 return -2;
579 return -2;
573 off = v;
580 off = v;
574 }
581 }
575 return -2;
582 /* multiple matches against an ambiguous prefix */
583 return -4;
576 }
584 }
577
585
578 static int nt_new(indexObject *self)
586 static int nt_new(indexObject *self)
579 {
587 {
580 if (self->ntlength == self->ntcapacity) {
588 if (self->ntlength == self->ntcapacity) {
581 self->ntcapacity *= 2;
589 self->ntcapacity *= 2;
582 self->nt = realloc(self->nt,
590 self->nt = realloc(self->nt,
583 self->ntcapacity * sizeof(nodetree));
591 self->ntcapacity * sizeof(nodetree));
584 if (self->nt == NULL) {
592 if (self->nt == NULL) {
585 PyErr_SetString(PyExc_MemoryError, "out of memory");
593 PyErr_SetString(PyExc_MemoryError, "out of memory");
586 return -1;
594 return -1;
587 }
595 }
588 memset(&self->nt[self->ntlength], 0,
596 memset(&self->nt[self->ntlength], 0,
589 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
597 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
590 }
598 }
591 return self->ntlength++;
599 return self->ntlength++;
592 }
600 }
593
601
594 static int nt_insert(indexObject *self, const char *node, int rev)
602 static int nt_insert(indexObject *self, const char *node, int rev)
595 {
603 {
596 int level = 0;
604 int level = 0;
597 int off = 0;
605 int off = 0;
598
606
599 while (level < 20) {
607 while (level < 20) {
600 int k = nt_level(node, level);
608 int k = nt_level(node, level);
601 nodetree *n;
609 nodetree *n;
602 int v;
610 int v;
603
611
604 n = &self->nt[off];
612 n = &self->nt[off];
605 v = n->children[k];
613 v = n->children[k];
606
614
607 if (v == 0) {
615 if (v == 0) {
608 n->children[k] = -rev - 1;
616 n->children[k] = -rev - 1;
609 return 0;
617 return 0;
610 }
618 }
611 if (v < 0) {
619 if (v < 0) {
612 const char *oldnode = index_node(self, -v - 1);
620 const char *oldnode = index_node(self, -v - 1);
613 int noff;
621 int noff;
614
622
615 if (!oldnode || !memcmp(oldnode, node, 20)) {
623 if (!oldnode || !memcmp(oldnode, node, 20)) {
616 n->children[k] = -rev - 1;
624 n->children[k] = -rev - 1;
617 return 0;
625 return 0;
618 }
626 }
619 noff = nt_new(self);
627 noff = nt_new(self);
620 if (noff == -1)
628 if (noff == -1)
621 return -1;
629 return -1;
622 /* self->nt may have been changed by realloc */
630 /* self->nt may have been changed by realloc */
623 self->nt[off].children[k] = noff;
631 self->nt[off].children[k] = noff;
624 off = noff;
632 off = noff;
625 n = &self->nt[off];
633 n = &self->nt[off];
626 n->children[nt_level(oldnode, ++level)] = v;
634 n->children[nt_level(oldnode, ++level)] = v;
627 if (level > self->ntdepth)
635 if (level > self->ntdepth)
628 self->ntdepth = level;
636 self->ntdepth = level;
629 self->ntsplits += 1;
637 self->ntsplits += 1;
630 } else {
638 } else {
631 level += 1;
639 level += 1;
632 off = v;
640 off = v;
633 }
641 }
634 }
642 }
635
643
636 return -1;
644 return -1;
637 }
645 }
638
646
639 static int nt_init(indexObject *self)
647 static int nt_init(indexObject *self)
640 {
648 {
641 if (self->nt == NULL) {
649 if (self->nt == NULL) {
642 self->ntcapacity = self->raw_length < 4
650 self->ntcapacity = self->raw_length < 4
643 ? 4 : self->raw_length / 2;
651 ? 4 : self->raw_length / 2;
644 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
652 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
645 if (self->nt == NULL) {
653 if (self->nt == NULL) {
646 PyErr_NoMemory();
654 PyErr_NoMemory();
647 return -1;
655 return -1;
648 }
656 }
649 self->ntlength = 1;
657 self->ntlength = 1;
650 self->ntrev = (int)index_length(self) - 1;
658 self->ntrev = (int)index_length(self) - 1;
651 self->ntlookups = 1;
659 self->ntlookups = 1;
652 self->ntmisses = 0;
660 self->ntmisses = 0;
653 }
661 }
654 return 0;
662 return 0;
655 }
663 }
656
664
657 /*
665 /*
658 * Return values:
666 * Return values:
659 *
667 *
660 * -3: error (exception set)
668 * -3: error (exception set)
661 * -2: not found (no exception set)
669 * -2: not found (no exception set)
662 * rest: valid rev
670 * rest: valid rev
663 */
671 */
664 static int index_find_node(indexObject *self,
672 static int index_find_node(indexObject *self,
665 const char *node, Py_ssize_t nodelen)
673 const char *node, Py_ssize_t nodelen)
666 {
674 {
667 int rev;
675 int rev;
668
676
669 self->ntlookups++;
677 self->ntlookups++;
670 rev = nt_find(self, node, nodelen);
678 rev = nt_find(self, node, nodelen);
671 if (rev >= -1)
679 if (rev >= -1)
672 return rev;
680 return rev;
673
681
674 if (nt_init(self) == -1)
682 if (nt_init(self) == -1)
675 return -3;
683 return -3;
676
684
677 /*
685 /*
678 * For the first handful of lookups, we scan the entire index,
686 * For the first handful of lookups, we scan the entire index,
679 * and cache only the matching nodes. This optimizes for cases
687 * and cache only the matching nodes. This optimizes for cases
680 * like "hg tip", where only a few nodes are accessed.
688 * like "hg tip", where only a few nodes are accessed.
681 *
689 *
682 * After that, we cache every node we visit, using a single
690 * After that, we cache every node we visit, using a single
683 * scan amortized over multiple lookups. This gives the best
691 * scan amortized over multiple lookups. This gives the best
684 * bulk performance, e.g. for "hg log".
692 * bulk performance, e.g. for "hg log".
685 */
693 */
686 if (self->ntmisses++ < 4) {
694 if (self->ntmisses++ < 4) {
687 for (rev = self->ntrev - 1; rev >= 0; rev--) {
695 for (rev = self->ntrev - 1; rev >= 0; rev--) {
688 const char *n = index_node(self, rev);
696 const char *n = index_node(self, rev);
689 if (n == NULL)
697 if (n == NULL)
690 return -2;
698 return -2;
691 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
699 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
692 if (nt_insert(self, n, rev) == -1)
700 if (nt_insert(self, n, rev) == -1)
693 return -3;
701 return -3;
694 break;
702 break;
695 }
703 }
696 }
704 }
697 } else {
705 } else {
698 for (rev = self->ntrev - 1; rev >= 0; rev--) {
706 for (rev = self->ntrev - 1; rev >= 0; rev--) {
699 const char *n = index_node(self, rev);
707 const char *n = index_node(self, rev);
700 if (n == NULL) {
708 if (n == NULL) {
701 self->ntrev = rev + 1;
709 self->ntrev = rev + 1;
702 return -2;
710 return -2;
703 }
711 }
704 if (nt_insert(self, n, rev) == -1) {
712 if (nt_insert(self, n, rev) == -1) {
705 self->ntrev = rev + 1;
713 self->ntrev = rev + 1;
706 return -3;
714 return -3;
707 }
715 }
708 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
716 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
709 break;
717 break;
710 }
718 }
711 }
719 }
712 self->ntrev = rev;
720 self->ntrev = rev;
713 }
721 }
714
722
715 if (rev >= 0)
723 if (rev >= 0)
716 return rev;
724 return rev;
717 return -2;
725 return -2;
718 }
726 }
719
727
720 static PyObject *raise_revlog_error(void)
728 static PyObject *raise_revlog_error(void)
721 {
729 {
722 static PyObject *errclass;
730 static PyObject *errclass;
723 PyObject *mod = NULL, *errobj;
731 PyObject *mod = NULL, *errobj;
724
732
725 if (errclass == NULL) {
733 if (errclass == NULL) {
726 PyObject *dict;
734 PyObject *dict;
727
735
728 mod = PyImport_ImportModule("mercurial.error");
736 mod = PyImport_ImportModule("mercurial.error");
729 if (mod == NULL)
737 if (mod == NULL)
730 goto classfail;
738 goto classfail;
731
739
732 dict = PyModule_GetDict(mod);
740 dict = PyModule_GetDict(mod);
733 if (dict == NULL)
741 if (dict == NULL)
734 goto classfail;
742 goto classfail;
735
743
736 errclass = PyDict_GetItemString(dict, "RevlogError");
744 errclass = PyDict_GetItemString(dict, "RevlogError");
737 if (errclass == NULL) {
745 if (errclass == NULL) {
738 PyErr_SetString(PyExc_SystemError,
746 PyErr_SetString(PyExc_SystemError,
739 "could not find RevlogError");
747 "could not find RevlogError");
740 goto classfail;
748 goto classfail;
741 }
749 }
742 Py_INCREF(errclass);
750 Py_INCREF(errclass);
743 }
751 }
744
752
745 errobj = PyObject_CallFunction(errclass, NULL);
753 errobj = PyObject_CallFunction(errclass, NULL);
746 if (errobj == NULL)
754 if (errobj == NULL)
747 return NULL;
755 return NULL;
748 PyErr_SetObject(errclass, errobj);
756 PyErr_SetObject(errclass, errobj);
749 return errobj;
757 return errobj;
750
758
751 classfail:
759 classfail:
752 Py_XDECREF(mod);
760 Py_XDECREF(mod);
753 return NULL;
761 return NULL;
754 }
762 }
755
763
756 static PyObject *index_getitem(indexObject *self, PyObject *value)
764 static PyObject *index_getitem(indexObject *self, PyObject *value)
757 {
765 {
758 char *node;
766 char *node;
759 Py_ssize_t nodelen;
767 Py_ssize_t nodelen;
760 int rev;
768 int rev;
761
769
762 if (PyInt_Check(value))
770 if (PyInt_Check(value))
763 return index_get(self, PyInt_AS_LONG(value));
771 return index_get(self, PyInt_AS_LONG(value));
764
772
765 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
773 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
766 return NULL;
774 return NULL;
767 rev = index_find_node(self, node, nodelen);
775 rev = index_find_node(self, node, nodelen);
768 if (rev >= -1)
776 if (rev >= -1)
769 return PyInt_FromLong(rev);
777 return PyInt_FromLong(rev);
770 if (rev == -2)
778 if (rev == -2)
771 raise_revlog_error();
779 raise_revlog_error();
772 return NULL;
780 return NULL;
773 }
781 }
774
782
775 static PyObject *index_m_get(indexObject *self, PyObject *args)
783 static PyObject *index_m_get(indexObject *self, PyObject *args)
776 {
784 {
777 char *node;
785 char *node;
778 int nodelen, rev;
786 int nodelen, rev;
779
787
780 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
788 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
781 return NULL;
789 return NULL;
782
790
783 rev = index_find_node(self, node, nodelen);
791 rev = index_find_node(self, node, nodelen);
784 if (rev == -3)
792 if (rev == -3)
785 return NULL;
793 return NULL;
786 if (rev == -2)
794 if (rev == -2)
787 Py_RETURN_NONE;
795 Py_RETURN_NONE;
788 return PyInt_FromLong(rev);
796 return PyInt_FromLong(rev);
789 }
797 }
790
798
791 static int index_contains(indexObject *self, PyObject *value)
799 static int index_contains(indexObject *self, PyObject *value)
792 {
800 {
793 char *node;
801 char *node;
794 Py_ssize_t nodelen;
802 Py_ssize_t nodelen;
795
803
796 if (PyInt_Check(value)) {
804 if (PyInt_Check(value)) {
797 long rev = PyInt_AS_LONG(value);
805 long rev = PyInt_AS_LONG(value);
798 return rev >= -1 && rev < index_length(self);
806 return rev >= -1 && rev < index_length(self);
799 }
807 }
800
808
801 if (!PyString_Check(value))
809 if (!PyString_Check(value))
802 return 0;
810 return 0;
803
811
804 node = PyString_AS_STRING(value);
812 node = PyString_AS_STRING(value);
805 nodelen = PyString_GET_SIZE(value);
813 nodelen = PyString_GET_SIZE(value);
806
814
807 switch (index_find_node(self, node, nodelen)) {
815 switch (index_find_node(self, node, nodelen)) {
808 case -3:
816 case -3:
809 return -1;
817 return -1;
810 case -2:
818 case -2:
811 return 0;
819 return 0;
812 default:
820 default:
813 return 1;
821 return 1;
814 }
822 }
815 }
823 }
816
824
817 /*
825 /*
818 * Invalidate any trie entries introduced by added revs.
826 * Invalidate any trie entries introduced by added revs.
819 */
827 */
820 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
828 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
821 {
829 {
822 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
830 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
823
831
824 for (i = start; i < len; i++) {
832 for (i = start; i < len; i++) {
825 PyObject *tuple = PyList_GET_ITEM(self->added, i);
833 PyObject *tuple = PyList_GET_ITEM(self->added, i);
826 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
834 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
827
835
828 nt_insert(self, PyString_AS_STRING(node), -1);
836 nt_insert(self, PyString_AS_STRING(node), -1);
829 }
837 }
830
838
831 if (start == 0) {
839 if (start == 0) {
832 Py_DECREF(self->added);
840 Py_DECREF(self->added);
833 self->added = NULL;
841 self->added = NULL;
834 }
842 }
835 }
843 }
836
844
837 /*
845 /*
838 * Delete a numeric range of revs, which must be at the end of the
846 * Delete a numeric range of revs, which must be at the end of the
839 * range, but exclude the sentinel nullid entry.
847 * range, but exclude the sentinel nullid entry.
840 */
848 */
841 static int index_slice_del(indexObject *self, PyObject *item)
849 static int index_slice_del(indexObject *self, PyObject *item)
842 {
850 {
843 Py_ssize_t start, stop, step, slicelength;
851 Py_ssize_t start, stop, step, slicelength;
844 Py_ssize_t length = index_length(self);
852 Py_ssize_t length = index_length(self);
845
853
846 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
854 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
847 &start, &stop, &step, &slicelength) < 0)
855 &start, &stop, &step, &slicelength) < 0)
848 return -1;
856 return -1;
849
857
850 if (slicelength <= 0)
858 if (slicelength <= 0)
851 return 0;
859 return 0;
852
860
853 if ((step < 0 && start < stop) || (step > 0 && start > stop))
861 if ((step < 0 && start < stop) || (step > 0 && start > stop))
854 stop = start;
862 stop = start;
855
863
856 if (step < 0) {
864 if (step < 0) {
857 stop = start + 1;
865 stop = start + 1;
858 start = stop + step*(slicelength - 1) - 1;
866 start = stop + step*(slicelength - 1) - 1;
859 step = -step;
867 step = -step;
860 }
868 }
861
869
862 if (step != 1) {
870 if (step != 1) {
863 PyErr_SetString(PyExc_ValueError,
871 PyErr_SetString(PyExc_ValueError,
864 "revlog index delete requires step size of 1");
872 "revlog index delete requires step size of 1");
865 return -1;
873 return -1;
866 }
874 }
867
875
868 if (stop != length - 1) {
876 if (stop != length - 1) {
869 PyErr_SetString(PyExc_IndexError,
877 PyErr_SetString(PyExc_IndexError,
870 "revlog index deletion indices are invalid");
878 "revlog index deletion indices are invalid");
871 return -1;
879 return -1;
872 }
880 }
873
881
874 if (start < self->length - 1) {
882 if (start < self->length - 1) {
875 if (self->nt) {
883 if (self->nt) {
876 Py_ssize_t i;
884 Py_ssize_t i;
877
885
878 for (i = start + 1; i < self->length - 1; i++) {
886 for (i = start + 1; i < self->length - 1; i++) {
879 const char *node = index_node(self, i);
887 const char *node = index_node(self, i);
880
888
881 if (node)
889 if (node)
882 nt_insert(self, node, -1);
890 nt_insert(self, node, -1);
883 }
891 }
884 if (self->added)
892 if (self->added)
885 nt_invalidate_added(self, 0);
893 nt_invalidate_added(self, 0);
886 if (self->ntrev > start)
894 if (self->ntrev > start)
887 self->ntrev = (int)start;
895 self->ntrev = (int)start;
888 }
896 }
889 self->length = start + 1;
897 self->length = start + 1;
890 return 0;
898 return 0;
891 }
899 }
892
900
893 if (self->nt) {
901 if (self->nt) {
894 nt_invalidate_added(self, start - self->length + 1);
902 nt_invalidate_added(self, start - self->length + 1);
895 if (self->ntrev > start)
903 if (self->ntrev > start)
896 self->ntrev = (int)start;
904 self->ntrev = (int)start;
897 }
905 }
898 return self->added
906 return self->added
899 ? PyList_SetSlice(self->added, start - self->length + 1,
907 ? PyList_SetSlice(self->added, start - self->length + 1,
900 PyList_GET_SIZE(self->added), NULL)
908 PyList_GET_SIZE(self->added), NULL)
901 : 0;
909 : 0;
902 }
910 }
903
911
904 /*
912 /*
905 * Supported ops:
913 * Supported ops:
906 *
914 *
907 * slice deletion
915 * slice deletion
908 * string assignment (extend node->rev mapping)
916 * string assignment (extend node->rev mapping)
909 * string deletion (shrink node->rev mapping)
917 * string deletion (shrink node->rev mapping)
910 */
918 */
911 static int index_assign_subscript(indexObject *self, PyObject *item,
919 static int index_assign_subscript(indexObject *self, PyObject *item,
912 PyObject *value)
920 PyObject *value)
913 {
921 {
914 char *node;
922 char *node;
915 Py_ssize_t nodelen;
923 Py_ssize_t nodelen;
916 long rev;
924 long rev;
917
925
918 if (PySlice_Check(item) && value == NULL)
926 if (PySlice_Check(item) && value == NULL)
919 return index_slice_del(self, item);
927 return index_slice_del(self, item);
920
928
921 if (node_check(item, &node, &nodelen) == -1)
929 if (node_check(item, &node, &nodelen) == -1)
922 return -1;
930 return -1;
923
931
924 if (value == NULL)
932 if (value == NULL)
925 return self->nt ? nt_insert(self, node, -1) : 0;
933 return self->nt ? nt_insert(self, node, -1) : 0;
926 rev = PyInt_AsLong(value);
934 rev = PyInt_AsLong(value);
927 if (rev > INT_MAX || rev < 0) {
935 if (rev > INT_MAX || rev < 0) {
928 if (!PyErr_Occurred())
936 if (!PyErr_Occurred())
929 PyErr_SetString(PyExc_ValueError, "rev out of range");
937 PyErr_SetString(PyExc_ValueError, "rev out of range");
930 return -1;
938 return -1;
931 }
939 }
932 return nt_insert(self, node, (int)rev);
940 return nt_insert(self, node, (int)rev);
933 }
941 }
934
942
935 /*
943 /*
936 * Find all RevlogNG entries in an index that has inline data. Update
944 * Find all RevlogNG entries in an index that has inline data. Update
937 * the optional "offsets" table with those entries.
945 * the optional "offsets" table with those entries.
938 */
946 */
939 static long inline_scan(indexObject *self, const char **offsets)
947 static long inline_scan(indexObject *self, const char **offsets)
940 {
948 {
941 const char *data = PyString_AS_STRING(self->data);
949 const char *data = PyString_AS_STRING(self->data);
942 const char *end = data + PyString_GET_SIZE(self->data);
950 const char *end = data + PyString_GET_SIZE(self->data);
943 const long hdrsize = 64;
951 const long hdrsize = 64;
944 long incr = hdrsize;
952 long incr = hdrsize;
945 Py_ssize_t len = 0;
953 Py_ssize_t len = 0;
946
954
947 while (data + hdrsize <= end) {
955 while (data + hdrsize <= end) {
948 uint32_t comp_len;
956 uint32_t comp_len;
949 const char *old_data;
957 const char *old_data;
950 /* 3rd element of header is length of compressed inline data */
958 /* 3rd element of header is length of compressed inline data */
951 comp_len = getbe32(data + 8);
959 comp_len = getbe32(data + 8);
952 incr = hdrsize + comp_len;
960 incr = hdrsize + comp_len;
953 if (incr < hdrsize)
961 if (incr < hdrsize)
954 break;
962 break;
955 if (offsets)
963 if (offsets)
956 offsets[len] = data;
964 offsets[len] = data;
957 len++;
965 len++;
958 old_data = data;
966 old_data = data;
959 data += incr;
967 data += incr;
960 if (data <= old_data)
968 if (data <= old_data)
961 break;
969 break;
962 }
970 }
963
971
964 if (data != end && data + hdrsize != end) {
972 if (data != end && data + hdrsize != end) {
965 if (!PyErr_Occurred())
973 if (!PyErr_Occurred())
966 PyErr_SetString(PyExc_ValueError, "corrupt index file");
974 PyErr_SetString(PyExc_ValueError, "corrupt index file");
967 return -1;
975 return -1;
968 }
976 }
969
977
970 return len;
978 return len;
971 }
979 }
972
980
973 static int index_init(indexObject *self, PyObject *args)
981 static int index_init(indexObject *self, PyObject *args)
974 {
982 {
975 PyObject *data_obj, *inlined_obj;
983 PyObject *data_obj, *inlined_obj;
976 Py_ssize_t size;
984 Py_ssize_t size;
977
985
978 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
986 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
979 return -1;
987 return -1;
980 if (!PyString_Check(data_obj)) {
988 if (!PyString_Check(data_obj)) {
981 PyErr_SetString(PyExc_TypeError, "data is not a string");
989 PyErr_SetString(PyExc_TypeError, "data is not a string");
982 return -1;
990 return -1;
983 }
991 }
984 size = PyString_GET_SIZE(data_obj);
992 size = PyString_GET_SIZE(data_obj);
985
993
986 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
994 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
987 self->data = data_obj;
995 self->data = data_obj;
988 self->cache = NULL;
996 self->cache = NULL;
989
997
990 self->added = NULL;
998 self->added = NULL;
991 self->offsets = NULL;
999 self->offsets = NULL;
992 self->nt = NULL;
1000 self->nt = NULL;
993 self->ntlength = self->ntcapacity = 0;
1001 self->ntlength = self->ntcapacity = 0;
994 self->ntdepth = self->ntsplits = 0;
1002 self->ntdepth = self->ntsplits = 0;
995 self->ntlookups = self->ntmisses = 0;
1003 self->ntlookups = self->ntmisses = 0;
996 self->ntrev = -1;
1004 self->ntrev = -1;
997 Py_INCREF(self->data);
1005 Py_INCREF(self->data);
998
1006
999 if (self->inlined) {
1007 if (self->inlined) {
1000 long len = inline_scan(self, NULL);
1008 long len = inline_scan(self, NULL);
1001 if (len == -1)
1009 if (len == -1)
1002 goto bail;
1010 goto bail;
1003 self->raw_length = len;
1011 self->raw_length = len;
1004 self->length = len + 1;
1012 self->length = len + 1;
1005 } else {
1013 } else {
1006 if (size % 64) {
1014 if (size % 64) {
1007 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1015 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1008 goto bail;
1016 goto bail;
1009 }
1017 }
1010 self->raw_length = size / 64;
1018 self->raw_length = size / 64;
1011 self->length = self->raw_length + 1;
1019 self->length = self->raw_length + 1;
1012 }
1020 }
1013
1021
1014 return 0;
1022 return 0;
1015 bail:
1023 bail:
1016 return -1;
1024 return -1;
1017 }
1025 }
1018
1026
1019 static PyObject *index_nodemap(indexObject *self)
1027 static PyObject *index_nodemap(indexObject *self)
1020 {
1028 {
1021 Py_INCREF(self);
1029 Py_INCREF(self);
1022 return (PyObject *)self;
1030 return (PyObject *)self;
1023 }
1031 }
1024
1032
1025 static void index_dealloc(indexObject *self)
1033 static void index_dealloc(indexObject *self)
1026 {
1034 {
1027 _index_clearcaches(self);
1035 _index_clearcaches(self);
1028 Py_DECREF(self->data);
1036 Py_DECREF(self->data);
1029 Py_XDECREF(self->added);
1037 Py_XDECREF(self->added);
1030 PyObject_Del(self);
1038 PyObject_Del(self);
1031 }
1039 }
1032
1040
1033 static PySequenceMethods index_sequence_methods = {
1041 static PySequenceMethods index_sequence_methods = {
1034 (lenfunc)index_length, /* sq_length */
1042 (lenfunc)index_length, /* sq_length */
1035 0, /* sq_concat */
1043 0, /* sq_concat */
1036 0, /* sq_repeat */
1044 0, /* sq_repeat */
1037 (ssizeargfunc)index_get, /* sq_item */
1045 (ssizeargfunc)index_get, /* sq_item */
1038 0, /* sq_slice */
1046 0, /* sq_slice */
1039 0, /* sq_ass_item */
1047 0, /* sq_ass_item */
1040 0, /* sq_ass_slice */
1048 0, /* sq_ass_slice */
1041 (objobjproc)index_contains, /* sq_contains */
1049 (objobjproc)index_contains, /* sq_contains */
1042 };
1050 };
1043
1051
1044 static PyMappingMethods index_mapping_methods = {
1052 static PyMappingMethods index_mapping_methods = {
1045 (lenfunc)index_length, /* mp_length */
1053 (lenfunc)index_length, /* mp_length */
1046 (binaryfunc)index_getitem, /* mp_subscript */
1054 (binaryfunc)index_getitem, /* mp_subscript */
1047 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1055 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1048 };
1056 };
1049
1057
1050 static PyMethodDef index_methods[] = {
1058 static PyMethodDef index_methods[] = {
1051 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1059 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1052 "clear the index caches"},
1060 "clear the index caches"},
1053 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1061 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1054 "get an index entry"},
1062 "get an index entry"},
1055 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1063 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1056 "insert an index entry"},
1064 "insert an index entry"},
1057 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1065 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1058 "stats for the index"},
1066 "stats for the index"},
1059 {NULL} /* Sentinel */
1067 {NULL} /* Sentinel */
1060 };
1068 };
1061
1069
1062 static PyGetSetDef index_getset[] = {
1070 static PyGetSetDef index_getset[] = {
1063 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1071 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1064 {NULL} /* Sentinel */
1072 {NULL} /* Sentinel */
1065 };
1073 };
1066
1074
1067 static PyTypeObject indexType = {
1075 static PyTypeObject indexType = {
1068 PyObject_HEAD_INIT(NULL)
1076 PyObject_HEAD_INIT(NULL)
1069 0, /* ob_size */
1077 0, /* ob_size */
1070 "parsers.index", /* tp_name */
1078 "parsers.index", /* tp_name */
1071 sizeof(indexObject), /* tp_basicsize */
1079 sizeof(indexObject), /* tp_basicsize */
1072 0, /* tp_itemsize */
1080 0, /* tp_itemsize */
1073 (destructor)index_dealloc, /* tp_dealloc */
1081 (destructor)index_dealloc, /* tp_dealloc */
1074 0, /* tp_print */
1082 0, /* tp_print */
1075 0, /* tp_getattr */
1083 0, /* tp_getattr */
1076 0, /* tp_setattr */
1084 0, /* tp_setattr */
1077 0, /* tp_compare */
1085 0, /* tp_compare */
1078 0, /* tp_repr */
1086 0, /* tp_repr */
1079 0, /* tp_as_number */
1087 0, /* tp_as_number */
1080 &index_sequence_methods, /* tp_as_sequence */
1088 &index_sequence_methods, /* tp_as_sequence */
1081 &index_mapping_methods, /* tp_as_mapping */
1089 &index_mapping_methods, /* tp_as_mapping */
1082 0, /* tp_hash */
1090 0, /* tp_hash */
1083 0, /* tp_call */
1091 0, /* tp_call */
1084 0, /* tp_str */
1092 0, /* tp_str */
1085 0, /* tp_getattro */
1093 0, /* tp_getattro */
1086 0, /* tp_setattro */
1094 0, /* tp_setattro */
1087 0, /* tp_as_buffer */
1095 0, /* tp_as_buffer */
1088 Py_TPFLAGS_DEFAULT, /* tp_flags */
1096 Py_TPFLAGS_DEFAULT, /* tp_flags */
1089 "revlog index", /* tp_doc */
1097 "revlog index", /* tp_doc */
1090 0, /* tp_traverse */
1098 0, /* tp_traverse */
1091 0, /* tp_clear */
1099 0, /* tp_clear */
1092 0, /* tp_richcompare */
1100 0, /* tp_richcompare */
1093 0, /* tp_weaklistoffset */
1101 0, /* tp_weaklistoffset */
1094 0, /* tp_iter */
1102 0, /* tp_iter */
1095 0, /* tp_iternext */
1103 0, /* tp_iternext */
1096 index_methods, /* tp_methods */
1104 index_methods, /* tp_methods */
1097 0, /* tp_members */
1105 0, /* tp_members */
1098 index_getset, /* tp_getset */
1106 index_getset, /* tp_getset */
1099 0, /* tp_base */
1107 0, /* tp_base */
1100 0, /* tp_dict */
1108 0, /* tp_dict */
1101 0, /* tp_descr_get */
1109 0, /* tp_descr_get */
1102 0, /* tp_descr_set */
1110 0, /* tp_descr_set */
1103 0, /* tp_dictoffset */
1111 0, /* tp_dictoffset */
1104 (initproc)index_init, /* tp_init */
1112 (initproc)index_init, /* tp_init */
1105 0, /* tp_alloc */
1113 0, /* tp_alloc */
1106 PyType_GenericNew, /* tp_new */
1114 PyType_GenericNew, /* tp_new */
1107 };
1115 };
1108
1116
1109 /*
1117 /*
1110 * returns a tuple of the form (index, index, cache) with elements as
1118 * returns a tuple of the form (index, index, cache) with elements as
1111 * follows:
1119 * follows:
1112 *
1120 *
1113 * index: an index object that lazily parses RevlogNG records
1121 * index: an index object that lazily parses RevlogNG records
1114 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1122 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1115 *
1123 *
1116 * added complications are for backwards compatibility
1124 * added complications are for backwards compatibility
1117 */
1125 */
1118 static PyObject *parse_index2(PyObject *self, PyObject *args)
1126 static PyObject *parse_index2(PyObject *self, PyObject *args)
1119 {
1127 {
1120 PyObject *tuple = NULL, *cache = NULL;
1128 PyObject *tuple = NULL, *cache = NULL;
1121 indexObject *idx;
1129 indexObject *idx;
1122 int ret;
1130 int ret;
1123
1131
1124 idx = PyObject_New(indexObject, &indexType);
1132 idx = PyObject_New(indexObject, &indexType);
1125 if (idx == NULL)
1133 if (idx == NULL)
1126 goto bail;
1134 goto bail;
1127
1135
1128 ret = index_init(idx, args);
1136 ret = index_init(idx, args);
1129 if (ret == -1)
1137 if (ret == -1)
1130 goto bail;
1138 goto bail;
1131
1139
1132 if (idx->inlined) {
1140 if (idx->inlined) {
1133 cache = Py_BuildValue("iO", 0, idx->data);
1141 cache = Py_BuildValue("iO", 0, idx->data);
1134 if (cache == NULL)
1142 if (cache == NULL)
1135 goto bail;
1143 goto bail;
1136 } else {
1144 } else {
1137 cache = Py_None;
1145 cache = Py_None;
1138 Py_INCREF(cache);
1146 Py_INCREF(cache);
1139 }
1147 }
1140
1148
1141 tuple = Py_BuildValue("NN", idx, cache);
1149 tuple = Py_BuildValue("NN", idx, cache);
1142 if (!tuple)
1150 if (!tuple)
1143 goto bail;
1151 goto bail;
1144 return tuple;
1152 return tuple;
1145
1153
1146 bail:
1154 bail:
1147 Py_XDECREF(idx);
1155 Py_XDECREF(idx);
1148 Py_XDECREF(cache);
1156 Py_XDECREF(cache);
1149 Py_XDECREF(tuple);
1157 Py_XDECREF(tuple);
1150 return NULL;
1158 return NULL;
1151 }
1159 }
1152
1160
1153 static char parsers_doc[] = "Efficient content parsing.";
1161 static char parsers_doc[] = "Efficient content parsing.";
1154
1162
1155 static PyMethodDef methods[] = {
1163 static PyMethodDef methods[] = {
1156 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1164 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1157 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1165 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1158 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1166 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1159 {NULL, NULL}
1167 {NULL, NULL}
1160 };
1168 };
1161
1169
1162 static void module_init(PyObject *mod)
1170 static void module_init(PyObject *mod)
1163 {
1171 {
1164 if (PyType_Ready(&indexType) < 0)
1172 if (PyType_Ready(&indexType) < 0)
1165 return;
1173 return;
1166 Py_INCREF(&indexType);
1174 Py_INCREF(&indexType);
1167
1175
1168 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1176 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1169
1177
1170 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1178 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1171 -1, -1, -1, -1, nullid, 20);
1179 -1, -1, -1, -1, nullid, 20);
1172 if (nullentry)
1180 if (nullentry)
1173 PyObject_GC_UnTrack(nullentry);
1181 PyObject_GC_UnTrack(nullentry);
1174 }
1182 }
1175
1183
1176 #ifdef IS_PY3K
1184 #ifdef IS_PY3K
1177 static struct PyModuleDef parsers_module = {
1185 static struct PyModuleDef parsers_module = {
1178 PyModuleDef_HEAD_INIT,
1186 PyModuleDef_HEAD_INIT,
1179 "parsers",
1187 "parsers",
1180 parsers_doc,
1188 parsers_doc,
1181 -1,
1189 -1,
1182 methods
1190 methods
1183 };
1191 };
1184
1192
1185 PyMODINIT_FUNC PyInit_parsers(void)
1193 PyMODINIT_FUNC PyInit_parsers(void)
1186 {
1194 {
1187 PyObject *mod = PyModule_Create(&parsers_module);
1195 PyObject *mod = PyModule_Create(&parsers_module);
1188 module_init(mod);
1196 module_init(mod);
1189 return mod;
1197 return mod;
1190 }
1198 }
1191 #else
1199 #else
1192 PyMODINIT_FUNC initparsers(void)
1200 PyMODINIT_FUNC initparsers(void)
1193 {
1201 {
1194 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1202 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1195 module_init(mod);
1203 module_init(mod);
1196 }
1204 }
1197 #endif
1205 #endif
General Comments 0
You need to be logged in to leave comments. Login now