##// END OF EJS Templates
parsers: change the type signature of hexdigit...
Bryan O'Sullivan -
r16617:4fb16743 default
parent child Browse files
Show More
@@ -1,1205 +1,1207 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <string.h>
12 #include <string.h>
13
13
14 #include "util.h"
14 #include "util.h"
15
15
16 static int hexdigit(char c)
16 static inline int hexdigit(const char *p, Py_ssize_t off)
17 {
17 {
18 char c = p[off];
19
18 if (c >= '0' && c <= '9')
20 if (c >= '0' && c <= '9')
19 return c - '0';
21 return c - '0';
20 if (c >= 'a' && c <= 'f')
22 if (c >= 'a' && c <= 'f')
21 return c - 'a' + 10;
23 return c - 'a' + 10;
22 if (c >= 'A' && c <= 'F')
24 if (c >= 'A' && c <= 'F')
23 return c - 'A' + 10;
25 return c - 'A' + 10;
24
26
25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
26 return 0;
28 return 0;
27 }
29 }
28
30
29 /*
31 /*
30 * Turn a hex-encoded string into binary.
32 * Turn a hex-encoded string into binary.
31 */
33 */
32 static PyObject *unhexlify(const char *str, int len)
34 static PyObject *unhexlify(const char *str, int len)
33 {
35 {
34 PyObject *ret;
36 PyObject *ret;
35 const char *c;
36 char *d;
37 char *d;
38 int i;
37
39
38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
39
41
40 if (!ret)
42 if (!ret)
41 return NULL;
43 return NULL;
42
44
43 d = PyBytes_AsString(ret);
45 d = PyBytes_AsString(ret);
44
46
45 for (c = str; c < str + len;) {
47 for (i = 0; i < len;) {
46 int hi = hexdigit(*c++);
48 int hi = hexdigit(str, i++);
47 int lo = hexdigit(*c++);
49 int lo = hexdigit(str, i++);
48 *d++ = (hi << 4) | lo;
50 *d++ = (hi << 4) | lo;
49 }
51 }
50
52
51 return ret;
53 return ret;
52 }
54 }
53
55
54 /*
56 /*
55 * This code assumes that a manifest is stitched together with newline
57 * This code assumes that a manifest is stitched together with newline
56 * ('\n') characters.
58 * ('\n') characters.
57 */
59 */
58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
59 {
61 {
60 PyObject *mfdict, *fdict;
62 PyObject *mfdict, *fdict;
61 char *str, *cur, *start, *zero;
63 char *str, *cur, *start, *zero;
62 int len;
64 int len;
63
65
64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
65 &PyDict_Type, &mfdict,
67 &PyDict_Type, &mfdict,
66 &PyDict_Type, &fdict,
68 &PyDict_Type, &fdict,
67 &str, &len))
69 &str, &len))
68 goto quit;
70 goto quit;
69
71
70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
71 PyObject *file = NULL, *node = NULL;
73 PyObject *file = NULL, *node = NULL;
72 PyObject *flags = NULL;
74 PyObject *flags = NULL;
73 int nlen;
75 int nlen;
74
76
75 if (!*cur) {
77 if (!*cur) {
76 zero = cur;
78 zero = cur;
77 continue;
79 continue;
78 }
80 }
79 else if (*cur != '\n')
81 else if (*cur != '\n')
80 continue;
82 continue;
81
83
82 if (!zero) {
84 if (!zero) {
83 PyErr_SetString(PyExc_ValueError,
85 PyErr_SetString(PyExc_ValueError,
84 "manifest entry has no separator");
86 "manifest entry has no separator");
85 goto quit;
87 goto quit;
86 }
88 }
87
89
88 file = PyBytes_FromStringAndSize(start, zero - start);
90 file = PyBytes_FromStringAndSize(start, zero - start);
89
91
90 if (!file)
92 if (!file)
91 goto bail;
93 goto bail;
92
94
93 nlen = cur - zero - 1;
95 nlen = cur - zero - 1;
94
96
95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
96 if (!node)
98 if (!node)
97 goto bail;
99 goto bail;
98
100
99 if (nlen > 40) {
101 if (nlen > 40) {
100 flags = PyBytes_FromStringAndSize(zero + 41,
102 flags = PyBytes_FromStringAndSize(zero + 41,
101 nlen - 40);
103 nlen - 40);
102 if (!flags)
104 if (!flags)
103 goto bail;
105 goto bail;
104
106
105 if (PyDict_SetItem(fdict, file, flags) == -1)
107 if (PyDict_SetItem(fdict, file, flags) == -1)
106 goto bail;
108 goto bail;
107 }
109 }
108
110
109 if (PyDict_SetItem(mfdict, file, node) == -1)
111 if (PyDict_SetItem(mfdict, file, node) == -1)
110 goto bail;
112 goto bail;
111
113
112 start = cur + 1;
114 start = cur + 1;
113 zero = NULL;
115 zero = NULL;
114
116
115 Py_XDECREF(flags);
117 Py_XDECREF(flags);
116 Py_XDECREF(node);
118 Py_XDECREF(node);
117 Py_XDECREF(file);
119 Py_XDECREF(file);
118 continue;
120 continue;
119 bail:
121 bail:
120 Py_XDECREF(flags);
122 Py_XDECREF(flags);
121 Py_XDECREF(node);
123 Py_XDECREF(node);
122 Py_XDECREF(file);
124 Py_XDECREF(file);
123 goto quit;
125 goto quit;
124 }
126 }
125
127
126 if (len > 0 && *(cur - 1) != '\n') {
128 if (len > 0 && *(cur - 1) != '\n') {
127 PyErr_SetString(PyExc_ValueError,
129 PyErr_SetString(PyExc_ValueError,
128 "manifest contains trailing garbage");
130 "manifest contains trailing garbage");
129 goto quit;
131 goto quit;
130 }
132 }
131
133
132 Py_INCREF(Py_None);
134 Py_INCREF(Py_None);
133 return Py_None;
135 return Py_None;
134 quit:
136 quit:
135 return NULL;
137 return NULL;
136 }
138 }
137
139
138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
139 {
141 {
140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
142 char *str, *cur, *end, *cpos;
144 char *str, *cur, *end, *cpos;
143 int state, mode, size, mtime;
145 int state, mode, size, mtime;
144 unsigned int flen;
146 unsigned int flen;
145 int len;
147 int len;
146
148
147 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
148 &PyDict_Type, &dmap,
150 &PyDict_Type, &dmap,
149 &PyDict_Type, &cmap,
151 &PyDict_Type, &cmap,
150 &str, &len))
152 &str, &len))
151 goto quit;
153 goto quit;
152
154
153 /* read parents */
155 /* read parents */
154 if (len < 40)
156 if (len < 40)
155 goto quit;
157 goto quit;
156
158
157 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
158 if (!parents)
160 if (!parents)
159 goto quit;
161 goto quit;
160
162
161 /* read filenames */
163 /* read filenames */
162 cur = str + 40;
164 cur = str + 40;
163 end = str + len;
165 end = str + len;
164
166
165 while (cur < end - 17) {
167 while (cur < end - 17) {
166 /* unpack header */
168 /* unpack header */
167 state = *cur;
169 state = *cur;
168 mode = getbe32(cur + 1);
170 mode = getbe32(cur + 1);
169 size = getbe32(cur + 5);
171 size = getbe32(cur + 5);
170 mtime = getbe32(cur + 9);
172 mtime = getbe32(cur + 9);
171 flen = getbe32(cur + 13);
173 flen = getbe32(cur + 13);
172 cur += 17;
174 cur += 17;
173 if (cur + flen > end || cur + flen < cur) {
175 if (cur + flen > end || cur + flen < cur) {
174 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
175 goto quit;
177 goto quit;
176 }
178 }
177
179
178 entry = Py_BuildValue("ciii", state, mode, size, mtime);
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
179 if (!entry)
181 if (!entry)
180 goto quit;
182 goto quit;
181 PyObject_GC_UnTrack(entry); /* don't waste time with this */
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
182
184
183 cpos = memchr(cur, 0, flen);
185 cpos = memchr(cur, 0, flen);
184 if (cpos) {
186 if (cpos) {
185 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
186 cname = PyBytes_FromStringAndSize(cpos + 1,
188 cname = PyBytes_FromStringAndSize(cpos + 1,
187 flen - (cpos - cur) - 1);
189 flen - (cpos - cur) - 1);
188 if (!fname || !cname ||
190 if (!fname || !cname ||
189 PyDict_SetItem(cmap, fname, cname) == -1 ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
190 PyDict_SetItem(dmap, fname, entry) == -1)
192 PyDict_SetItem(dmap, fname, entry) == -1)
191 goto quit;
193 goto quit;
192 Py_DECREF(cname);
194 Py_DECREF(cname);
193 } else {
195 } else {
194 fname = PyBytes_FromStringAndSize(cur, flen);
196 fname = PyBytes_FromStringAndSize(cur, flen);
195 if (!fname ||
197 if (!fname ||
196 PyDict_SetItem(dmap, fname, entry) == -1)
198 PyDict_SetItem(dmap, fname, entry) == -1)
197 goto quit;
199 goto quit;
198 }
200 }
199 cur += flen;
201 cur += flen;
200 Py_DECREF(fname);
202 Py_DECREF(fname);
201 Py_DECREF(entry);
203 Py_DECREF(entry);
202 fname = cname = entry = NULL;
204 fname = cname = entry = NULL;
203 }
205 }
204
206
205 ret = parents;
207 ret = parents;
206 Py_INCREF(ret);
208 Py_INCREF(ret);
207 quit:
209 quit:
208 Py_XDECREF(fname);
210 Py_XDECREF(fname);
209 Py_XDECREF(cname);
211 Py_XDECREF(cname);
210 Py_XDECREF(entry);
212 Py_XDECREF(entry);
211 Py_XDECREF(parents);
213 Py_XDECREF(parents);
212 return ret;
214 return ret;
213 }
215 }
214
216
215 /*
217 /*
216 * A base-16 trie for fast node->rev mapping.
218 * A base-16 trie for fast node->rev mapping.
217 *
219 *
218 * Positive value is index of the next node in the trie
220 * Positive value is index of the next node in the trie
219 * Negative value is a leaf: -(rev + 1)
221 * Negative value is a leaf: -(rev + 1)
220 * Zero is empty
222 * Zero is empty
221 */
223 */
222 typedef struct {
224 typedef struct {
223 int children[16];
225 int children[16];
224 } nodetree;
226 } nodetree;
225
227
226 /*
228 /*
227 * This class has two behaviours.
229 * This class has two behaviours.
228 *
230 *
229 * When used in a list-like way (with integer keys), we decode an
231 * When used in a list-like way (with integer keys), we decode an
230 * entry in a RevlogNG index file on demand. Our last entry is a
232 * entry in a RevlogNG index file on demand. Our last entry is a
231 * sentinel, always a nullid. We have limited support for
233 * sentinel, always a nullid. We have limited support for
232 * integer-keyed insert and delete, only at elements right before the
234 * integer-keyed insert and delete, only at elements right before the
233 * sentinel.
235 * sentinel.
234 *
236 *
235 * With string keys, we lazily perform a reverse mapping from node to
237 * With string keys, we lazily perform a reverse mapping from node to
236 * rev, using a base-16 trie.
238 * rev, using a base-16 trie.
237 */
239 */
238 typedef struct {
240 typedef struct {
239 PyObject_HEAD
241 PyObject_HEAD
240 /* Type-specific fields go here. */
242 /* Type-specific fields go here. */
241 PyObject *data; /* raw bytes of index */
243 PyObject *data; /* raw bytes of index */
242 PyObject **cache; /* cached tuples */
244 PyObject **cache; /* cached tuples */
243 const char **offsets; /* populated on demand */
245 const char **offsets; /* populated on demand */
244 Py_ssize_t raw_length; /* original number of elements */
246 Py_ssize_t raw_length; /* original number of elements */
245 Py_ssize_t length; /* current number of elements */
247 Py_ssize_t length; /* current number of elements */
246 PyObject *added; /* populated on demand */
248 PyObject *added; /* populated on demand */
247 nodetree *nt; /* base-16 trie */
249 nodetree *nt; /* base-16 trie */
248 int ntlength; /* # nodes in use */
250 int ntlength; /* # nodes in use */
249 int ntcapacity; /* # nodes allocated */
251 int ntcapacity; /* # nodes allocated */
250 int ntdepth; /* maximum depth of tree */
252 int ntdepth; /* maximum depth of tree */
251 int ntsplits; /* # splits performed */
253 int ntsplits; /* # splits performed */
252 int ntrev; /* last rev scanned */
254 int ntrev; /* last rev scanned */
253 int ntlookups; /* # lookups */
255 int ntlookups; /* # lookups */
254 int ntmisses; /* # lookups that miss the cache */
256 int ntmisses; /* # lookups that miss the cache */
255 int inlined;
257 int inlined;
256 } indexObject;
258 } indexObject;
257
259
258 static Py_ssize_t index_length(const indexObject *self)
260 static Py_ssize_t index_length(const indexObject *self)
259 {
261 {
260 if (self->added == NULL)
262 if (self->added == NULL)
261 return self->length;
263 return self->length;
262 return self->length + PyList_GET_SIZE(self->added);
264 return self->length + PyList_GET_SIZE(self->added);
263 }
265 }
264
266
265 static PyObject *nullentry;
267 static PyObject *nullentry;
266 static const char nullid[20];
268 static const char nullid[20];
267
269
268 static long inline_scan(indexObject *self, const char **offsets);
270 static long inline_scan(indexObject *self, const char **offsets);
269
271
270 #if LONG_MAX == 0x7fffffffL
272 #if LONG_MAX == 0x7fffffffL
271 static char *tuple_format = "Kiiiiiis#";
273 static char *tuple_format = "Kiiiiiis#";
272 #else
274 #else
273 static char *tuple_format = "kiiiiiis#";
275 static char *tuple_format = "kiiiiiis#";
274 #endif
276 #endif
275
277
276 /*
278 /*
277 * Return a pointer to the beginning of a RevlogNG record.
279 * Return a pointer to the beginning of a RevlogNG record.
278 */
280 */
279 static const char *index_deref(indexObject *self, Py_ssize_t pos)
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
280 {
282 {
281 if (self->inlined && pos > 0) {
283 if (self->inlined && pos > 0) {
282 if (self->offsets == NULL) {
284 if (self->offsets == NULL) {
283 self->offsets = malloc(self->raw_length *
285 self->offsets = malloc(self->raw_length *
284 sizeof(*self->offsets));
286 sizeof(*self->offsets));
285 if (self->offsets == NULL)
287 if (self->offsets == NULL)
286 return (const char *)PyErr_NoMemory();
288 return (const char *)PyErr_NoMemory();
287 inline_scan(self, self->offsets);
289 inline_scan(self, self->offsets);
288 }
290 }
289 return self->offsets[pos];
291 return self->offsets[pos];
290 }
292 }
291
293
292 return PyString_AS_STRING(self->data) + pos * 64;
294 return PyString_AS_STRING(self->data) + pos * 64;
293 }
295 }
294
296
295 /*
297 /*
296 * RevlogNG format (all in big endian, data may be inlined):
298 * RevlogNG format (all in big endian, data may be inlined):
297 * 6 bytes: offset
299 * 6 bytes: offset
298 * 2 bytes: flags
300 * 2 bytes: flags
299 * 4 bytes: compressed length
301 * 4 bytes: compressed length
300 * 4 bytes: uncompressed length
302 * 4 bytes: uncompressed length
301 * 4 bytes: base revision
303 * 4 bytes: base revision
302 * 4 bytes: link revision
304 * 4 bytes: link revision
303 * 4 bytes: parent 1 revision
305 * 4 bytes: parent 1 revision
304 * 4 bytes: parent 2 revision
306 * 4 bytes: parent 2 revision
305 * 32 bytes: nodeid (only 20 bytes used)
307 * 32 bytes: nodeid (only 20 bytes used)
306 */
308 */
307 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
308 {
310 {
309 uint64_t offset_flags;
311 uint64_t offset_flags;
310 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
311 const char *c_node_id;
313 const char *c_node_id;
312 const char *data;
314 const char *data;
313 Py_ssize_t length = index_length(self);
315 Py_ssize_t length = index_length(self);
314 PyObject *entry;
316 PyObject *entry;
315
317
316 if (pos < 0)
318 if (pos < 0)
317 pos += length;
319 pos += length;
318
320
319 if (pos < 0 || pos >= length) {
321 if (pos < 0 || pos >= length) {
320 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
321 return NULL;
323 return NULL;
322 }
324 }
323
325
324 if (pos == length - 1) {
326 if (pos == length - 1) {
325 Py_INCREF(nullentry);
327 Py_INCREF(nullentry);
326 return nullentry;
328 return nullentry;
327 }
329 }
328
330
329 if (pos >= self->length - 1) {
331 if (pos >= self->length - 1) {
330 PyObject *obj;
332 PyObject *obj;
331 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
332 Py_INCREF(obj);
334 Py_INCREF(obj);
333 return obj;
335 return obj;
334 }
336 }
335
337
336 if (self->cache) {
338 if (self->cache) {
337 if (self->cache[pos]) {
339 if (self->cache[pos]) {
338 Py_INCREF(self->cache[pos]);
340 Py_INCREF(self->cache[pos]);
339 return self->cache[pos];
341 return self->cache[pos];
340 }
342 }
341 } else {
343 } else {
342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
343 if (self->cache == NULL)
345 if (self->cache == NULL)
344 return PyErr_NoMemory();
346 return PyErr_NoMemory();
345 }
347 }
346
348
347 data = index_deref(self, pos);
349 data = index_deref(self, pos);
348 if (data == NULL)
350 if (data == NULL)
349 return NULL;
351 return NULL;
350
352
351 offset_flags = getbe32(data + 4);
353 offset_flags = getbe32(data + 4);
352 if (pos == 0) /* mask out version number for the first entry */
354 if (pos == 0) /* mask out version number for the first entry */
353 offset_flags &= 0xFFFF;
355 offset_flags &= 0xFFFF;
354 else {
356 else {
355 uint32_t offset_high = getbe32(data);
357 uint32_t offset_high = getbe32(data);
356 offset_flags |= ((uint64_t)offset_high) << 32;
358 offset_flags |= ((uint64_t)offset_high) << 32;
357 }
359 }
358
360
359 comp_len = getbe32(data + 8);
361 comp_len = getbe32(data + 8);
360 uncomp_len = getbe32(data + 12);
362 uncomp_len = getbe32(data + 12);
361 base_rev = getbe32(data + 16);
363 base_rev = getbe32(data + 16);
362 link_rev = getbe32(data + 20);
364 link_rev = getbe32(data + 20);
363 parent_1 = getbe32(data + 24);
365 parent_1 = getbe32(data + 24);
364 parent_2 = getbe32(data + 28);
366 parent_2 = getbe32(data + 28);
365 c_node_id = data + 32;
367 c_node_id = data + 32;
366
368
367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
368 uncomp_len, base_rev, link_rev,
370 uncomp_len, base_rev, link_rev,
369 parent_1, parent_2, c_node_id, 20);
371 parent_1, parent_2, c_node_id, 20);
370
372
371 if (entry)
373 if (entry)
372 PyObject_GC_UnTrack(entry);
374 PyObject_GC_UnTrack(entry);
373
375
374 self->cache[pos] = entry;
376 self->cache[pos] = entry;
375 Py_INCREF(entry);
377 Py_INCREF(entry);
376
378
377 return entry;
379 return entry;
378 }
380 }
379
381
380 /*
382 /*
381 * Return the 20-byte SHA of the node corresponding to the given rev.
383 * Return the 20-byte SHA of the node corresponding to the given rev.
382 */
384 */
383 static const char *index_node(indexObject *self, Py_ssize_t pos)
385 static const char *index_node(indexObject *self, Py_ssize_t pos)
384 {
386 {
385 Py_ssize_t length = index_length(self);
387 Py_ssize_t length = index_length(self);
386 const char *data;
388 const char *data;
387
389
388 if (pos == length - 1)
390 if (pos == length - 1)
389 return nullid;
391 return nullid;
390
392
391 if (pos >= length)
393 if (pos >= length)
392 return NULL;
394 return NULL;
393
395
394 if (pos >= self->length - 1) {
396 if (pos >= self->length - 1) {
395 PyObject *tuple, *str;
397 PyObject *tuple, *str;
396 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
397 str = PyTuple_GetItem(tuple, 7);
399 str = PyTuple_GetItem(tuple, 7);
398 return str ? PyString_AS_STRING(str) : NULL;
400 return str ? PyString_AS_STRING(str) : NULL;
399 }
401 }
400
402
401 data = index_deref(self, pos);
403 data = index_deref(self, pos);
402 return data ? data + 32 : NULL;
404 return data ? data + 32 : NULL;
403 }
405 }
404
406
405 static int nt_insert(indexObject *self, const char *node, int rev);
407 static int nt_insert(indexObject *self, const char *node, int rev);
406
408
407 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
408 {
410 {
409 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
410 return -1;
412 return -1;
411 if (*nodelen == 20)
413 if (*nodelen == 20)
412 return 0;
414 return 0;
413 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
414 return -1;
416 return -1;
415 }
417 }
416
418
417 static PyObject *index_insert(indexObject *self, PyObject *args)
419 static PyObject *index_insert(indexObject *self, PyObject *args)
418 {
420 {
419 PyObject *obj;
421 PyObject *obj;
420 char *node;
422 char *node;
421 long offset;
423 long offset;
422 Py_ssize_t len, nodelen;
424 Py_ssize_t len, nodelen;
423
425
424 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
425 return NULL;
427 return NULL;
426
428
427 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
428 PyErr_SetString(PyExc_TypeError, "8-tuple required");
430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
429 return NULL;
431 return NULL;
430 }
432 }
431
433
432 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
433 return NULL;
435 return NULL;
434
436
435 len = index_length(self);
437 len = index_length(self);
436
438
437 if (offset < 0)
439 if (offset < 0)
438 offset += len;
440 offset += len;
439
441
440 if (offset != len - 1) {
442 if (offset != len - 1) {
441 PyErr_SetString(PyExc_IndexError,
443 PyErr_SetString(PyExc_IndexError,
442 "insert only supported at index -1");
444 "insert only supported at index -1");
443 return NULL;
445 return NULL;
444 }
446 }
445
447
446 if (offset > INT_MAX) {
448 if (offset > INT_MAX) {
447 PyErr_SetString(PyExc_ValueError,
449 PyErr_SetString(PyExc_ValueError,
448 "currently only 2**31 revs supported");
450 "currently only 2**31 revs supported");
449 return NULL;
451 return NULL;
450 }
452 }
451
453
452 if (self->added == NULL) {
454 if (self->added == NULL) {
453 self->added = PyList_New(0);
455 self->added = PyList_New(0);
454 if (self->added == NULL)
456 if (self->added == NULL)
455 return NULL;
457 return NULL;
456 }
458 }
457
459
458 if (PyList_Append(self->added, obj) == -1)
460 if (PyList_Append(self->added, obj) == -1)
459 return NULL;
461 return NULL;
460
462
461 if (self->nt)
463 if (self->nt)
462 nt_insert(self, node, (int)offset);
464 nt_insert(self, node, (int)offset);
463
465
464 Py_RETURN_NONE;
466 Py_RETURN_NONE;
465 }
467 }
466
468
467 static void _index_clearcaches(indexObject *self)
469 static void _index_clearcaches(indexObject *self)
468 {
470 {
469 if (self->cache) {
471 if (self->cache) {
470 Py_ssize_t i;
472 Py_ssize_t i;
471
473
472 for (i = 0; i < self->raw_length; i++) {
474 for (i = 0; i < self->raw_length; i++) {
473 if (self->cache[i]) {
475 if (self->cache[i]) {
474 Py_DECREF(self->cache[i]);
476 Py_DECREF(self->cache[i]);
475 self->cache[i] = NULL;
477 self->cache[i] = NULL;
476 }
478 }
477 }
479 }
478 free(self->cache);
480 free(self->cache);
479 self->cache = NULL;
481 self->cache = NULL;
480 }
482 }
481 if (self->offsets) {
483 if (self->offsets) {
482 free(self->offsets);
484 free(self->offsets);
483 self->offsets = NULL;
485 self->offsets = NULL;
484 }
486 }
485 if (self->nt) {
487 if (self->nt) {
486 free(self->nt);
488 free(self->nt);
487 self->nt = NULL;
489 self->nt = NULL;
488 }
490 }
489 }
491 }
490
492
491 static PyObject *index_clearcaches(indexObject *self)
493 static PyObject *index_clearcaches(indexObject *self)
492 {
494 {
493 _index_clearcaches(self);
495 _index_clearcaches(self);
494 self->ntlength = self->ntcapacity = 0;
496 self->ntlength = self->ntcapacity = 0;
495 self->ntdepth = self->ntsplits = 0;
497 self->ntdepth = self->ntsplits = 0;
496 self->ntrev = -1;
498 self->ntrev = -1;
497 self->ntlookups = self->ntmisses = 0;
499 self->ntlookups = self->ntmisses = 0;
498 Py_RETURN_NONE;
500 Py_RETURN_NONE;
499 }
501 }
500
502
501 static PyObject *index_stats(indexObject *self)
503 static PyObject *index_stats(indexObject *self)
502 {
504 {
503 PyObject *obj = PyDict_New();
505 PyObject *obj = PyDict_New();
504
506
505 if (obj == NULL)
507 if (obj == NULL)
506 return NULL;
508 return NULL;
507
509
508 #define istat(__n, __d) \
510 #define istat(__n, __d) \
509 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
511 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
510 goto bail;
512 goto bail;
511
513
512 if (self->added) {
514 if (self->added) {
513 Py_ssize_t len = PyList_GET_SIZE(self->added);
515 Py_ssize_t len = PyList_GET_SIZE(self->added);
514 if (PyDict_SetItemString(obj, "index entries added",
516 if (PyDict_SetItemString(obj, "index entries added",
515 PyInt_FromLong(len)) == -1)
517 PyInt_FromLong(len)) == -1)
516 goto bail;
518 goto bail;
517 }
519 }
518
520
519 if (self->raw_length != self->length - 1)
521 if (self->raw_length != self->length - 1)
520 istat(raw_length, "revs on disk");
522 istat(raw_length, "revs on disk");
521 istat(length, "revs in memory");
523 istat(length, "revs in memory");
522 istat(ntcapacity, "node trie capacity");
524 istat(ntcapacity, "node trie capacity");
523 istat(ntdepth, "node trie depth");
525 istat(ntdepth, "node trie depth");
524 istat(ntlength, "node trie count");
526 istat(ntlength, "node trie count");
525 istat(ntlookups, "node trie lookups");
527 istat(ntlookups, "node trie lookups");
526 istat(ntmisses, "node trie misses");
528 istat(ntmisses, "node trie misses");
527 istat(ntrev, "node trie last rev scanned");
529 istat(ntrev, "node trie last rev scanned");
528 istat(ntsplits, "node trie splits");
530 istat(ntsplits, "node trie splits");
529
531
530 #undef istat
532 #undef istat
531
533
532 return obj;
534 return obj;
533
535
534 bail:
536 bail:
535 Py_XDECREF(obj);
537 Py_XDECREF(obj);
536 return NULL;
538 return NULL;
537 }
539 }
538
540
539 static inline int nt_level(const char *node, int level)
541 static inline int nt_level(const char *node, int level)
540 {
542 {
541 int v = node[level>>1];
543 int v = node[level>>1];
542 if (!(level & 1))
544 if (!(level & 1))
543 v >>= 4;
545 v >>= 4;
544 return v & 0xf;
546 return v & 0xf;
545 }
547 }
546
548
547 /*
549 /*
548 * Return values:
550 * Return values:
549 *
551 *
550 * -4: match is ambiguous (multiple candidates)
552 * -4: match is ambiguous (multiple candidates)
551 * -2: not found
553 * -2: not found
552 * rest: valid rev
554 * rest: valid rev
553 */
555 */
554 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
556 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
555 {
557 {
556 int level, off;
558 int level, off;
557
559
558 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
560 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
559 return -1;
561 return -1;
560
562
561 if (self->nt == NULL)
563 if (self->nt == NULL)
562 return -2;
564 return -2;
563
565
564 for (level = off = 0; level < nodelen; level++) {
566 for (level = off = 0; level < nodelen; level++) {
565 int k = nt_level(node, level);
567 int k = nt_level(node, level);
566 nodetree *n = &self->nt[off];
568 nodetree *n = &self->nt[off];
567 int v = n->children[k];
569 int v = n->children[k];
568
570
569 if (v < 0) {
571 if (v < 0) {
570 const char *n;
572 const char *n;
571 v = -v - 1;
573 v = -v - 1;
572 n = index_node(self, v);
574 n = index_node(self, v);
573 if (n == NULL)
575 if (n == NULL)
574 return -2;
576 return -2;
575 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
577 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
576 ? -2 : v;
578 ? -2 : v;
577 }
579 }
578 if (v == 0)
580 if (v == 0)
579 return -2;
581 return -2;
580 off = v;
582 off = v;
581 }
583 }
582 /* multiple matches against an ambiguous prefix */
584 /* multiple matches against an ambiguous prefix */
583 return -4;
585 return -4;
584 }
586 }
585
587
586 static int nt_new(indexObject *self)
588 static int nt_new(indexObject *self)
587 {
589 {
588 if (self->ntlength == self->ntcapacity) {
590 if (self->ntlength == self->ntcapacity) {
589 self->ntcapacity *= 2;
591 self->ntcapacity *= 2;
590 self->nt = realloc(self->nt,
592 self->nt = realloc(self->nt,
591 self->ntcapacity * sizeof(nodetree));
593 self->ntcapacity * sizeof(nodetree));
592 if (self->nt == NULL) {
594 if (self->nt == NULL) {
593 PyErr_SetString(PyExc_MemoryError, "out of memory");
595 PyErr_SetString(PyExc_MemoryError, "out of memory");
594 return -1;
596 return -1;
595 }
597 }
596 memset(&self->nt[self->ntlength], 0,
598 memset(&self->nt[self->ntlength], 0,
597 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
599 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
598 }
600 }
599 return self->ntlength++;
601 return self->ntlength++;
600 }
602 }
601
603
602 static int nt_insert(indexObject *self, const char *node, int rev)
604 static int nt_insert(indexObject *self, const char *node, int rev)
603 {
605 {
604 int level = 0;
606 int level = 0;
605 int off = 0;
607 int off = 0;
606
608
607 while (level < 20) {
609 while (level < 20) {
608 int k = nt_level(node, level);
610 int k = nt_level(node, level);
609 nodetree *n;
611 nodetree *n;
610 int v;
612 int v;
611
613
612 n = &self->nt[off];
614 n = &self->nt[off];
613 v = n->children[k];
615 v = n->children[k];
614
616
615 if (v == 0) {
617 if (v == 0) {
616 n->children[k] = -rev - 1;
618 n->children[k] = -rev - 1;
617 return 0;
619 return 0;
618 }
620 }
619 if (v < 0) {
621 if (v < 0) {
620 const char *oldnode = index_node(self, -v - 1);
622 const char *oldnode = index_node(self, -v - 1);
621 int noff;
623 int noff;
622
624
623 if (!oldnode || !memcmp(oldnode, node, 20)) {
625 if (!oldnode || !memcmp(oldnode, node, 20)) {
624 n->children[k] = -rev - 1;
626 n->children[k] = -rev - 1;
625 return 0;
627 return 0;
626 }
628 }
627 noff = nt_new(self);
629 noff = nt_new(self);
628 if (noff == -1)
630 if (noff == -1)
629 return -1;
631 return -1;
630 /* self->nt may have been changed by realloc */
632 /* self->nt may have been changed by realloc */
631 self->nt[off].children[k] = noff;
633 self->nt[off].children[k] = noff;
632 off = noff;
634 off = noff;
633 n = &self->nt[off];
635 n = &self->nt[off];
634 n->children[nt_level(oldnode, ++level)] = v;
636 n->children[nt_level(oldnode, ++level)] = v;
635 if (level > self->ntdepth)
637 if (level > self->ntdepth)
636 self->ntdepth = level;
638 self->ntdepth = level;
637 self->ntsplits += 1;
639 self->ntsplits += 1;
638 } else {
640 } else {
639 level += 1;
641 level += 1;
640 off = v;
642 off = v;
641 }
643 }
642 }
644 }
643
645
644 return -1;
646 return -1;
645 }
647 }
646
648
647 static int nt_init(indexObject *self)
649 static int nt_init(indexObject *self)
648 {
650 {
649 if (self->nt == NULL) {
651 if (self->nt == NULL) {
650 self->ntcapacity = self->raw_length < 4
652 self->ntcapacity = self->raw_length < 4
651 ? 4 : self->raw_length / 2;
653 ? 4 : self->raw_length / 2;
652 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
654 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
653 if (self->nt == NULL) {
655 if (self->nt == NULL) {
654 PyErr_NoMemory();
656 PyErr_NoMemory();
655 return -1;
657 return -1;
656 }
658 }
657 self->ntlength = 1;
659 self->ntlength = 1;
658 self->ntrev = (int)index_length(self) - 1;
660 self->ntrev = (int)index_length(self) - 1;
659 self->ntlookups = 1;
661 self->ntlookups = 1;
660 self->ntmisses = 0;
662 self->ntmisses = 0;
661 }
663 }
662 return 0;
664 return 0;
663 }
665 }
664
666
665 /*
667 /*
666 * Return values:
668 * Return values:
667 *
669 *
668 * -3: error (exception set)
670 * -3: error (exception set)
669 * -2: not found (no exception set)
671 * -2: not found (no exception set)
670 * rest: valid rev
672 * rest: valid rev
671 */
673 */
672 static int index_find_node(indexObject *self,
674 static int index_find_node(indexObject *self,
673 const char *node, Py_ssize_t nodelen)
675 const char *node, Py_ssize_t nodelen)
674 {
676 {
675 int rev;
677 int rev;
676
678
677 self->ntlookups++;
679 self->ntlookups++;
678 rev = nt_find(self, node, nodelen);
680 rev = nt_find(self, node, nodelen);
679 if (rev >= -1)
681 if (rev >= -1)
680 return rev;
682 return rev;
681
683
682 if (nt_init(self) == -1)
684 if (nt_init(self) == -1)
683 return -3;
685 return -3;
684
686
685 /*
687 /*
686 * For the first handful of lookups, we scan the entire index,
688 * For the first handful of lookups, we scan the entire index,
687 * and cache only the matching nodes. This optimizes for cases
689 * and cache only the matching nodes. This optimizes for cases
688 * like "hg tip", where only a few nodes are accessed.
690 * like "hg tip", where only a few nodes are accessed.
689 *
691 *
690 * After that, we cache every node we visit, using a single
692 * After that, we cache every node we visit, using a single
691 * scan amortized over multiple lookups. This gives the best
693 * scan amortized over multiple lookups. This gives the best
692 * bulk performance, e.g. for "hg log".
694 * bulk performance, e.g. for "hg log".
693 */
695 */
694 if (self->ntmisses++ < 4) {
696 if (self->ntmisses++ < 4) {
695 for (rev = self->ntrev - 1; rev >= 0; rev--) {
697 for (rev = self->ntrev - 1; rev >= 0; rev--) {
696 const char *n = index_node(self, rev);
698 const char *n = index_node(self, rev);
697 if (n == NULL)
699 if (n == NULL)
698 return -2;
700 return -2;
699 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
701 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
700 if (nt_insert(self, n, rev) == -1)
702 if (nt_insert(self, n, rev) == -1)
701 return -3;
703 return -3;
702 break;
704 break;
703 }
705 }
704 }
706 }
705 } else {
707 } else {
706 for (rev = self->ntrev - 1; rev >= 0; rev--) {
708 for (rev = self->ntrev - 1; rev >= 0; rev--) {
707 const char *n = index_node(self, rev);
709 const char *n = index_node(self, rev);
708 if (n == NULL) {
710 if (n == NULL) {
709 self->ntrev = rev + 1;
711 self->ntrev = rev + 1;
710 return -2;
712 return -2;
711 }
713 }
712 if (nt_insert(self, n, rev) == -1) {
714 if (nt_insert(self, n, rev) == -1) {
713 self->ntrev = rev + 1;
715 self->ntrev = rev + 1;
714 return -3;
716 return -3;
715 }
717 }
716 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
718 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
717 break;
719 break;
718 }
720 }
719 }
721 }
720 self->ntrev = rev;
722 self->ntrev = rev;
721 }
723 }
722
724
723 if (rev >= 0)
725 if (rev >= 0)
724 return rev;
726 return rev;
725 return -2;
727 return -2;
726 }
728 }
727
729
728 static PyObject *raise_revlog_error(void)
730 static PyObject *raise_revlog_error(void)
729 {
731 {
730 static PyObject *errclass;
732 static PyObject *errclass;
731 PyObject *mod = NULL, *errobj;
733 PyObject *mod = NULL, *errobj;
732
734
733 if (errclass == NULL) {
735 if (errclass == NULL) {
734 PyObject *dict;
736 PyObject *dict;
735
737
736 mod = PyImport_ImportModule("mercurial.error");
738 mod = PyImport_ImportModule("mercurial.error");
737 if (mod == NULL)
739 if (mod == NULL)
738 goto classfail;
740 goto classfail;
739
741
740 dict = PyModule_GetDict(mod);
742 dict = PyModule_GetDict(mod);
741 if (dict == NULL)
743 if (dict == NULL)
742 goto classfail;
744 goto classfail;
743
745
744 errclass = PyDict_GetItemString(dict, "RevlogError");
746 errclass = PyDict_GetItemString(dict, "RevlogError");
745 if (errclass == NULL) {
747 if (errclass == NULL) {
746 PyErr_SetString(PyExc_SystemError,
748 PyErr_SetString(PyExc_SystemError,
747 "could not find RevlogError");
749 "could not find RevlogError");
748 goto classfail;
750 goto classfail;
749 }
751 }
750 Py_INCREF(errclass);
752 Py_INCREF(errclass);
751 }
753 }
752
754
753 errobj = PyObject_CallFunction(errclass, NULL);
755 errobj = PyObject_CallFunction(errclass, NULL);
754 if (errobj == NULL)
756 if (errobj == NULL)
755 return NULL;
757 return NULL;
756 PyErr_SetObject(errclass, errobj);
758 PyErr_SetObject(errclass, errobj);
757 return errobj;
759 return errobj;
758
760
759 classfail:
761 classfail:
760 Py_XDECREF(mod);
762 Py_XDECREF(mod);
761 return NULL;
763 return NULL;
762 }
764 }
763
765
764 static PyObject *index_getitem(indexObject *self, PyObject *value)
766 static PyObject *index_getitem(indexObject *self, PyObject *value)
765 {
767 {
766 char *node;
768 char *node;
767 Py_ssize_t nodelen;
769 Py_ssize_t nodelen;
768 int rev;
770 int rev;
769
771
770 if (PyInt_Check(value))
772 if (PyInt_Check(value))
771 return index_get(self, PyInt_AS_LONG(value));
773 return index_get(self, PyInt_AS_LONG(value));
772
774
773 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
775 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
774 return NULL;
776 return NULL;
775 rev = index_find_node(self, node, nodelen);
777 rev = index_find_node(self, node, nodelen);
776 if (rev >= -1)
778 if (rev >= -1)
777 return PyInt_FromLong(rev);
779 return PyInt_FromLong(rev);
778 if (rev == -2)
780 if (rev == -2)
779 raise_revlog_error();
781 raise_revlog_error();
780 return NULL;
782 return NULL;
781 }
783 }
782
784
783 static PyObject *index_m_get(indexObject *self, PyObject *args)
785 static PyObject *index_m_get(indexObject *self, PyObject *args)
784 {
786 {
785 char *node;
787 char *node;
786 int nodelen, rev;
788 int nodelen, rev;
787
789
788 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
790 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
789 return NULL;
791 return NULL;
790
792
791 rev = index_find_node(self, node, nodelen);
793 rev = index_find_node(self, node, nodelen);
792 if (rev == -3)
794 if (rev == -3)
793 return NULL;
795 return NULL;
794 if (rev == -2)
796 if (rev == -2)
795 Py_RETURN_NONE;
797 Py_RETURN_NONE;
796 return PyInt_FromLong(rev);
798 return PyInt_FromLong(rev);
797 }
799 }
798
800
799 static int index_contains(indexObject *self, PyObject *value)
801 static int index_contains(indexObject *self, PyObject *value)
800 {
802 {
801 char *node;
803 char *node;
802 Py_ssize_t nodelen;
804 Py_ssize_t nodelen;
803
805
804 if (PyInt_Check(value)) {
806 if (PyInt_Check(value)) {
805 long rev = PyInt_AS_LONG(value);
807 long rev = PyInt_AS_LONG(value);
806 return rev >= -1 && rev < index_length(self);
808 return rev >= -1 && rev < index_length(self);
807 }
809 }
808
810
809 if (!PyString_Check(value))
811 if (!PyString_Check(value))
810 return 0;
812 return 0;
811
813
812 node = PyString_AS_STRING(value);
814 node = PyString_AS_STRING(value);
813 nodelen = PyString_GET_SIZE(value);
815 nodelen = PyString_GET_SIZE(value);
814
816
815 switch (index_find_node(self, node, nodelen)) {
817 switch (index_find_node(self, node, nodelen)) {
816 case -3:
818 case -3:
817 return -1;
819 return -1;
818 case -2:
820 case -2:
819 return 0;
821 return 0;
820 default:
822 default:
821 return 1;
823 return 1;
822 }
824 }
823 }
825 }
824
826
825 /*
827 /*
826 * Invalidate any trie entries introduced by added revs.
828 * Invalidate any trie entries introduced by added revs.
827 */
829 */
828 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
830 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
829 {
831 {
830 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
832 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
831
833
832 for (i = start; i < len; i++) {
834 for (i = start; i < len; i++) {
833 PyObject *tuple = PyList_GET_ITEM(self->added, i);
835 PyObject *tuple = PyList_GET_ITEM(self->added, i);
834 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
836 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
835
837
836 nt_insert(self, PyString_AS_STRING(node), -1);
838 nt_insert(self, PyString_AS_STRING(node), -1);
837 }
839 }
838
840
839 if (start == 0) {
841 if (start == 0) {
840 Py_DECREF(self->added);
842 Py_DECREF(self->added);
841 self->added = NULL;
843 self->added = NULL;
842 }
844 }
843 }
845 }
844
846
845 /*
847 /*
846 * Delete a numeric range of revs, which must be at the end of the
848 * Delete a numeric range of revs, which must be at the end of the
847 * range, but exclude the sentinel nullid entry.
849 * range, but exclude the sentinel nullid entry.
848 */
850 */
849 static int index_slice_del(indexObject *self, PyObject *item)
851 static int index_slice_del(indexObject *self, PyObject *item)
850 {
852 {
851 Py_ssize_t start, stop, step, slicelength;
853 Py_ssize_t start, stop, step, slicelength;
852 Py_ssize_t length = index_length(self);
854 Py_ssize_t length = index_length(self);
853
855
854 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
856 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
855 &start, &stop, &step, &slicelength) < 0)
857 &start, &stop, &step, &slicelength) < 0)
856 return -1;
858 return -1;
857
859
858 if (slicelength <= 0)
860 if (slicelength <= 0)
859 return 0;
861 return 0;
860
862
861 if ((step < 0 && start < stop) || (step > 0 && start > stop))
863 if ((step < 0 && start < stop) || (step > 0 && start > stop))
862 stop = start;
864 stop = start;
863
865
864 if (step < 0) {
866 if (step < 0) {
865 stop = start + 1;
867 stop = start + 1;
866 start = stop + step*(slicelength - 1) - 1;
868 start = stop + step*(slicelength - 1) - 1;
867 step = -step;
869 step = -step;
868 }
870 }
869
871
870 if (step != 1) {
872 if (step != 1) {
871 PyErr_SetString(PyExc_ValueError,
873 PyErr_SetString(PyExc_ValueError,
872 "revlog index delete requires step size of 1");
874 "revlog index delete requires step size of 1");
873 return -1;
875 return -1;
874 }
876 }
875
877
876 if (stop != length - 1) {
878 if (stop != length - 1) {
877 PyErr_SetString(PyExc_IndexError,
879 PyErr_SetString(PyExc_IndexError,
878 "revlog index deletion indices are invalid");
880 "revlog index deletion indices are invalid");
879 return -1;
881 return -1;
880 }
882 }
881
883
882 if (start < self->length - 1) {
884 if (start < self->length - 1) {
883 if (self->nt) {
885 if (self->nt) {
884 Py_ssize_t i;
886 Py_ssize_t i;
885
887
886 for (i = start + 1; i < self->length - 1; i++) {
888 for (i = start + 1; i < self->length - 1; i++) {
887 const char *node = index_node(self, i);
889 const char *node = index_node(self, i);
888
890
889 if (node)
891 if (node)
890 nt_insert(self, node, -1);
892 nt_insert(self, node, -1);
891 }
893 }
892 if (self->added)
894 if (self->added)
893 nt_invalidate_added(self, 0);
895 nt_invalidate_added(self, 0);
894 if (self->ntrev > start)
896 if (self->ntrev > start)
895 self->ntrev = (int)start;
897 self->ntrev = (int)start;
896 }
898 }
897 self->length = start + 1;
899 self->length = start + 1;
898 return 0;
900 return 0;
899 }
901 }
900
902
901 if (self->nt) {
903 if (self->nt) {
902 nt_invalidate_added(self, start - self->length + 1);
904 nt_invalidate_added(self, start - self->length + 1);
903 if (self->ntrev > start)
905 if (self->ntrev > start)
904 self->ntrev = (int)start;
906 self->ntrev = (int)start;
905 }
907 }
906 return self->added
908 return self->added
907 ? PyList_SetSlice(self->added, start - self->length + 1,
909 ? PyList_SetSlice(self->added, start - self->length + 1,
908 PyList_GET_SIZE(self->added), NULL)
910 PyList_GET_SIZE(self->added), NULL)
909 : 0;
911 : 0;
910 }
912 }
911
913
912 /*
914 /*
913 * Supported ops:
915 * Supported ops:
914 *
916 *
915 * slice deletion
917 * slice deletion
916 * string assignment (extend node->rev mapping)
918 * string assignment (extend node->rev mapping)
917 * string deletion (shrink node->rev mapping)
919 * string deletion (shrink node->rev mapping)
918 */
920 */
919 static int index_assign_subscript(indexObject *self, PyObject *item,
921 static int index_assign_subscript(indexObject *self, PyObject *item,
920 PyObject *value)
922 PyObject *value)
921 {
923 {
922 char *node;
924 char *node;
923 Py_ssize_t nodelen;
925 Py_ssize_t nodelen;
924 long rev;
926 long rev;
925
927
926 if (PySlice_Check(item) && value == NULL)
928 if (PySlice_Check(item) && value == NULL)
927 return index_slice_del(self, item);
929 return index_slice_del(self, item);
928
930
929 if (node_check(item, &node, &nodelen) == -1)
931 if (node_check(item, &node, &nodelen) == -1)
930 return -1;
932 return -1;
931
933
932 if (value == NULL)
934 if (value == NULL)
933 return self->nt ? nt_insert(self, node, -1) : 0;
935 return self->nt ? nt_insert(self, node, -1) : 0;
934 rev = PyInt_AsLong(value);
936 rev = PyInt_AsLong(value);
935 if (rev > INT_MAX || rev < 0) {
937 if (rev > INT_MAX || rev < 0) {
936 if (!PyErr_Occurred())
938 if (!PyErr_Occurred())
937 PyErr_SetString(PyExc_ValueError, "rev out of range");
939 PyErr_SetString(PyExc_ValueError, "rev out of range");
938 return -1;
940 return -1;
939 }
941 }
940 return nt_insert(self, node, (int)rev);
942 return nt_insert(self, node, (int)rev);
941 }
943 }
942
944
943 /*
945 /*
944 * Find all RevlogNG entries in an index that has inline data. Update
946 * Find all RevlogNG entries in an index that has inline data. Update
945 * the optional "offsets" table with those entries.
947 * the optional "offsets" table with those entries.
946 */
948 */
947 static long inline_scan(indexObject *self, const char **offsets)
949 static long inline_scan(indexObject *self, const char **offsets)
948 {
950 {
949 const char *data = PyString_AS_STRING(self->data);
951 const char *data = PyString_AS_STRING(self->data);
950 const char *end = data + PyString_GET_SIZE(self->data);
952 const char *end = data + PyString_GET_SIZE(self->data);
951 const long hdrsize = 64;
953 const long hdrsize = 64;
952 long incr = hdrsize;
954 long incr = hdrsize;
953 Py_ssize_t len = 0;
955 Py_ssize_t len = 0;
954
956
955 while (data + hdrsize <= end) {
957 while (data + hdrsize <= end) {
956 uint32_t comp_len;
958 uint32_t comp_len;
957 const char *old_data;
959 const char *old_data;
958 /* 3rd element of header is length of compressed inline data */
960 /* 3rd element of header is length of compressed inline data */
959 comp_len = getbe32(data + 8);
961 comp_len = getbe32(data + 8);
960 incr = hdrsize + comp_len;
962 incr = hdrsize + comp_len;
961 if (incr < hdrsize)
963 if (incr < hdrsize)
962 break;
964 break;
963 if (offsets)
965 if (offsets)
964 offsets[len] = data;
966 offsets[len] = data;
965 len++;
967 len++;
966 old_data = data;
968 old_data = data;
967 data += incr;
969 data += incr;
968 if (data <= old_data)
970 if (data <= old_data)
969 break;
971 break;
970 }
972 }
971
973
972 if (data != end && data + hdrsize != end) {
974 if (data != end && data + hdrsize != end) {
973 if (!PyErr_Occurred())
975 if (!PyErr_Occurred())
974 PyErr_SetString(PyExc_ValueError, "corrupt index file");
976 PyErr_SetString(PyExc_ValueError, "corrupt index file");
975 return -1;
977 return -1;
976 }
978 }
977
979
978 return len;
980 return len;
979 }
981 }
980
982
981 static int index_init(indexObject *self, PyObject *args)
983 static int index_init(indexObject *self, PyObject *args)
982 {
984 {
983 PyObject *data_obj, *inlined_obj;
985 PyObject *data_obj, *inlined_obj;
984 Py_ssize_t size;
986 Py_ssize_t size;
985
987
986 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
988 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
987 return -1;
989 return -1;
988 if (!PyString_Check(data_obj)) {
990 if (!PyString_Check(data_obj)) {
989 PyErr_SetString(PyExc_TypeError, "data is not a string");
991 PyErr_SetString(PyExc_TypeError, "data is not a string");
990 return -1;
992 return -1;
991 }
993 }
992 size = PyString_GET_SIZE(data_obj);
994 size = PyString_GET_SIZE(data_obj);
993
995
994 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
996 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
995 self->data = data_obj;
997 self->data = data_obj;
996 self->cache = NULL;
998 self->cache = NULL;
997
999
998 self->added = NULL;
1000 self->added = NULL;
999 self->offsets = NULL;
1001 self->offsets = NULL;
1000 self->nt = NULL;
1002 self->nt = NULL;
1001 self->ntlength = self->ntcapacity = 0;
1003 self->ntlength = self->ntcapacity = 0;
1002 self->ntdepth = self->ntsplits = 0;
1004 self->ntdepth = self->ntsplits = 0;
1003 self->ntlookups = self->ntmisses = 0;
1005 self->ntlookups = self->ntmisses = 0;
1004 self->ntrev = -1;
1006 self->ntrev = -1;
1005 Py_INCREF(self->data);
1007 Py_INCREF(self->data);
1006
1008
1007 if (self->inlined) {
1009 if (self->inlined) {
1008 long len = inline_scan(self, NULL);
1010 long len = inline_scan(self, NULL);
1009 if (len == -1)
1011 if (len == -1)
1010 goto bail;
1012 goto bail;
1011 self->raw_length = len;
1013 self->raw_length = len;
1012 self->length = len + 1;
1014 self->length = len + 1;
1013 } else {
1015 } else {
1014 if (size % 64) {
1016 if (size % 64) {
1015 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1017 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1016 goto bail;
1018 goto bail;
1017 }
1019 }
1018 self->raw_length = size / 64;
1020 self->raw_length = size / 64;
1019 self->length = self->raw_length + 1;
1021 self->length = self->raw_length + 1;
1020 }
1022 }
1021
1023
1022 return 0;
1024 return 0;
1023 bail:
1025 bail:
1024 return -1;
1026 return -1;
1025 }
1027 }
1026
1028
1027 static PyObject *index_nodemap(indexObject *self)
1029 static PyObject *index_nodemap(indexObject *self)
1028 {
1030 {
1029 Py_INCREF(self);
1031 Py_INCREF(self);
1030 return (PyObject *)self;
1032 return (PyObject *)self;
1031 }
1033 }
1032
1034
1033 static void index_dealloc(indexObject *self)
1035 static void index_dealloc(indexObject *self)
1034 {
1036 {
1035 _index_clearcaches(self);
1037 _index_clearcaches(self);
1036 Py_DECREF(self->data);
1038 Py_DECREF(self->data);
1037 Py_XDECREF(self->added);
1039 Py_XDECREF(self->added);
1038 PyObject_Del(self);
1040 PyObject_Del(self);
1039 }
1041 }
1040
1042
1041 static PySequenceMethods index_sequence_methods = {
1043 static PySequenceMethods index_sequence_methods = {
1042 (lenfunc)index_length, /* sq_length */
1044 (lenfunc)index_length, /* sq_length */
1043 0, /* sq_concat */
1045 0, /* sq_concat */
1044 0, /* sq_repeat */
1046 0, /* sq_repeat */
1045 (ssizeargfunc)index_get, /* sq_item */
1047 (ssizeargfunc)index_get, /* sq_item */
1046 0, /* sq_slice */
1048 0, /* sq_slice */
1047 0, /* sq_ass_item */
1049 0, /* sq_ass_item */
1048 0, /* sq_ass_slice */
1050 0, /* sq_ass_slice */
1049 (objobjproc)index_contains, /* sq_contains */
1051 (objobjproc)index_contains, /* sq_contains */
1050 };
1052 };
1051
1053
1052 static PyMappingMethods index_mapping_methods = {
1054 static PyMappingMethods index_mapping_methods = {
1053 (lenfunc)index_length, /* mp_length */
1055 (lenfunc)index_length, /* mp_length */
1054 (binaryfunc)index_getitem, /* mp_subscript */
1056 (binaryfunc)index_getitem, /* mp_subscript */
1055 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1057 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1056 };
1058 };
1057
1059
1058 static PyMethodDef index_methods[] = {
1060 static PyMethodDef index_methods[] = {
1059 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1061 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1060 "clear the index caches"},
1062 "clear the index caches"},
1061 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1063 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1062 "get an index entry"},
1064 "get an index entry"},
1063 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1065 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1064 "insert an index entry"},
1066 "insert an index entry"},
1065 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1067 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1066 "stats for the index"},
1068 "stats for the index"},
1067 {NULL} /* Sentinel */
1069 {NULL} /* Sentinel */
1068 };
1070 };
1069
1071
1070 static PyGetSetDef index_getset[] = {
1072 static PyGetSetDef index_getset[] = {
1071 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1073 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1072 {NULL} /* Sentinel */
1074 {NULL} /* Sentinel */
1073 };
1075 };
1074
1076
1075 static PyTypeObject indexType = {
1077 static PyTypeObject indexType = {
1076 PyObject_HEAD_INIT(NULL)
1078 PyObject_HEAD_INIT(NULL)
1077 0, /* ob_size */
1079 0, /* ob_size */
1078 "parsers.index", /* tp_name */
1080 "parsers.index", /* tp_name */
1079 sizeof(indexObject), /* tp_basicsize */
1081 sizeof(indexObject), /* tp_basicsize */
1080 0, /* tp_itemsize */
1082 0, /* tp_itemsize */
1081 (destructor)index_dealloc, /* tp_dealloc */
1083 (destructor)index_dealloc, /* tp_dealloc */
1082 0, /* tp_print */
1084 0, /* tp_print */
1083 0, /* tp_getattr */
1085 0, /* tp_getattr */
1084 0, /* tp_setattr */
1086 0, /* tp_setattr */
1085 0, /* tp_compare */
1087 0, /* tp_compare */
1086 0, /* tp_repr */
1088 0, /* tp_repr */
1087 0, /* tp_as_number */
1089 0, /* tp_as_number */
1088 &index_sequence_methods, /* tp_as_sequence */
1090 &index_sequence_methods, /* tp_as_sequence */
1089 &index_mapping_methods, /* tp_as_mapping */
1091 &index_mapping_methods, /* tp_as_mapping */
1090 0, /* tp_hash */
1092 0, /* tp_hash */
1091 0, /* tp_call */
1093 0, /* tp_call */
1092 0, /* tp_str */
1094 0, /* tp_str */
1093 0, /* tp_getattro */
1095 0, /* tp_getattro */
1094 0, /* tp_setattro */
1096 0, /* tp_setattro */
1095 0, /* tp_as_buffer */
1097 0, /* tp_as_buffer */
1096 Py_TPFLAGS_DEFAULT, /* tp_flags */
1098 Py_TPFLAGS_DEFAULT, /* tp_flags */
1097 "revlog index", /* tp_doc */
1099 "revlog index", /* tp_doc */
1098 0, /* tp_traverse */
1100 0, /* tp_traverse */
1099 0, /* tp_clear */
1101 0, /* tp_clear */
1100 0, /* tp_richcompare */
1102 0, /* tp_richcompare */
1101 0, /* tp_weaklistoffset */
1103 0, /* tp_weaklistoffset */
1102 0, /* tp_iter */
1104 0, /* tp_iter */
1103 0, /* tp_iternext */
1105 0, /* tp_iternext */
1104 index_methods, /* tp_methods */
1106 index_methods, /* tp_methods */
1105 0, /* tp_members */
1107 0, /* tp_members */
1106 index_getset, /* tp_getset */
1108 index_getset, /* tp_getset */
1107 0, /* tp_base */
1109 0, /* tp_base */
1108 0, /* tp_dict */
1110 0, /* tp_dict */
1109 0, /* tp_descr_get */
1111 0, /* tp_descr_get */
1110 0, /* tp_descr_set */
1112 0, /* tp_descr_set */
1111 0, /* tp_dictoffset */
1113 0, /* tp_dictoffset */
1112 (initproc)index_init, /* tp_init */
1114 (initproc)index_init, /* tp_init */
1113 0, /* tp_alloc */
1115 0, /* tp_alloc */
1114 PyType_GenericNew, /* tp_new */
1116 PyType_GenericNew, /* tp_new */
1115 };
1117 };
1116
1118
1117 /*
1119 /*
1118 * returns a tuple of the form (index, index, cache) with elements as
1120 * returns a tuple of the form (index, index, cache) with elements as
1119 * follows:
1121 * follows:
1120 *
1122 *
1121 * index: an index object that lazily parses RevlogNG records
1123 * index: an index object that lazily parses RevlogNG records
1122 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1124 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1123 *
1125 *
1124 * added complications are for backwards compatibility
1126 * added complications are for backwards compatibility
1125 */
1127 */
1126 static PyObject *parse_index2(PyObject *self, PyObject *args)
1128 static PyObject *parse_index2(PyObject *self, PyObject *args)
1127 {
1129 {
1128 PyObject *tuple = NULL, *cache = NULL;
1130 PyObject *tuple = NULL, *cache = NULL;
1129 indexObject *idx;
1131 indexObject *idx;
1130 int ret;
1132 int ret;
1131
1133
1132 idx = PyObject_New(indexObject, &indexType);
1134 idx = PyObject_New(indexObject, &indexType);
1133 if (idx == NULL)
1135 if (idx == NULL)
1134 goto bail;
1136 goto bail;
1135
1137
1136 ret = index_init(idx, args);
1138 ret = index_init(idx, args);
1137 if (ret == -1)
1139 if (ret == -1)
1138 goto bail;
1140 goto bail;
1139
1141
1140 if (idx->inlined) {
1142 if (idx->inlined) {
1141 cache = Py_BuildValue("iO", 0, idx->data);
1143 cache = Py_BuildValue("iO", 0, idx->data);
1142 if (cache == NULL)
1144 if (cache == NULL)
1143 goto bail;
1145 goto bail;
1144 } else {
1146 } else {
1145 cache = Py_None;
1147 cache = Py_None;
1146 Py_INCREF(cache);
1148 Py_INCREF(cache);
1147 }
1149 }
1148
1150
1149 tuple = Py_BuildValue("NN", idx, cache);
1151 tuple = Py_BuildValue("NN", idx, cache);
1150 if (!tuple)
1152 if (!tuple)
1151 goto bail;
1153 goto bail;
1152 return tuple;
1154 return tuple;
1153
1155
1154 bail:
1156 bail:
1155 Py_XDECREF(idx);
1157 Py_XDECREF(idx);
1156 Py_XDECREF(cache);
1158 Py_XDECREF(cache);
1157 Py_XDECREF(tuple);
1159 Py_XDECREF(tuple);
1158 return NULL;
1160 return NULL;
1159 }
1161 }
1160
1162
1161 static char parsers_doc[] = "Efficient content parsing.";
1163 static char parsers_doc[] = "Efficient content parsing.";
1162
1164
1163 static PyMethodDef methods[] = {
1165 static PyMethodDef methods[] = {
1164 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1166 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1165 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1167 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1166 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1168 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1167 {NULL, NULL}
1169 {NULL, NULL}
1168 };
1170 };
1169
1171
1170 static void module_init(PyObject *mod)
1172 static void module_init(PyObject *mod)
1171 {
1173 {
1172 if (PyType_Ready(&indexType) < 0)
1174 if (PyType_Ready(&indexType) < 0)
1173 return;
1175 return;
1174 Py_INCREF(&indexType);
1176 Py_INCREF(&indexType);
1175
1177
1176 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1178 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1177
1179
1178 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1180 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1179 -1, -1, -1, -1, nullid, 20);
1181 -1, -1, -1, -1, nullid, 20);
1180 if (nullentry)
1182 if (nullentry)
1181 PyObject_GC_UnTrack(nullentry);
1183 PyObject_GC_UnTrack(nullentry);
1182 }
1184 }
1183
1185
1184 #ifdef IS_PY3K
1186 #ifdef IS_PY3K
1185 static struct PyModuleDef parsers_module = {
1187 static struct PyModuleDef parsers_module = {
1186 PyModuleDef_HEAD_INIT,
1188 PyModuleDef_HEAD_INIT,
1187 "parsers",
1189 "parsers",
1188 parsers_doc,
1190 parsers_doc,
1189 -1,
1191 -1,
1190 methods
1192 methods
1191 };
1193 };
1192
1194
1193 PyMODINIT_FUNC PyInit_parsers(void)
1195 PyMODINIT_FUNC PyInit_parsers(void)
1194 {
1196 {
1195 PyObject *mod = PyModule_Create(&parsers_module);
1197 PyObject *mod = PyModule_Create(&parsers_module);
1196 module_init(mod);
1198 module_init(mod);
1197 return mod;
1199 return mod;
1198 }
1200 }
1199 #else
1201 #else
1200 PyMODINIT_FUNC initparsers(void)
1202 PyMODINIT_FUNC initparsers(void)
1201 {
1203 {
1202 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1204 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1203 module_init(mod);
1205 module_init(mod);
1204 }
1206 }
1205 #endif
1207 #endif
General Comments 0
You need to be logged in to leave comments. Login now