##// END OF EJS Templates
revlog: speed up prefix matching against nodes...
Bryan O'Sullivan -
r16665:e410be86 default
parent child Browse files
Show More
@@ -1,1220 +1,1293 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <string.h>
12 #include <string.h>
13
13
14 #include "util.h"
14 #include "util.h"
15
15
16 static inline int hexdigit(const char *p, Py_ssize_t off)
16 static inline int hexdigit(const char *p, Py_ssize_t off)
17 {
17 {
18 char c = p[off];
18 char c = p[off];
19
19
20 if (c >= '0' && c <= '9')
20 if (c >= '0' && c <= '9')
21 return c - '0';
21 return c - '0';
22 if (c >= 'a' && c <= 'f')
22 if (c >= 'a' && c <= 'f')
23 return c - 'a' + 10;
23 return c - 'a' + 10;
24 if (c >= 'A' && c <= 'F')
24 if (c >= 'A' && c <= 'F')
25 return c - 'A' + 10;
25 return c - 'A' + 10;
26
26
27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 return 0;
28 return 0;
29 }
29 }
30
30
31 /*
31 /*
32 * Turn a hex-encoded string into binary.
32 * Turn a hex-encoded string into binary.
33 */
33 */
34 static PyObject *unhexlify(const char *str, int len)
34 static PyObject *unhexlify(const char *str, int len)
35 {
35 {
36 PyObject *ret;
36 PyObject *ret;
37 char *d;
37 char *d;
38 int i;
38 int i;
39
39
40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41
41
42 if (!ret)
42 if (!ret)
43 return NULL;
43 return NULL;
44
44
45 d = PyBytes_AsString(ret);
45 d = PyBytes_AsString(ret);
46
46
47 for (i = 0; i < len;) {
47 for (i = 0; i < len;) {
48 int hi = hexdigit(str, i++);
48 int hi = hexdigit(str, i++);
49 int lo = hexdigit(str, i++);
49 int lo = hexdigit(str, i++);
50 *d++ = (hi << 4) | lo;
50 *d++ = (hi << 4) | lo;
51 }
51 }
52
52
53 return ret;
53 return ret;
54 }
54 }
55
55
56 /*
56 /*
57 * This code assumes that a manifest is stitched together with newline
57 * This code assumes that a manifest is stitched together with newline
58 * ('\n') characters.
58 * ('\n') characters.
59 */
59 */
60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 {
61 {
62 PyObject *mfdict, *fdict;
62 PyObject *mfdict, *fdict;
63 char *str, *cur, *start, *zero;
63 char *str, *cur, *start, *zero;
64 int len;
64 int len;
65
65
66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 &PyDict_Type, &mfdict,
67 &PyDict_Type, &mfdict,
68 &PyDict_Type, &fdict,
68 &PyDict_Type, &fdict,
69 &str, &len))
69 &str, &len))
70 goto quit;
70 goto quit;
71
71
72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 PyObject *file = NULL, *node = NULL;
73 PyObject *file = NULL, *node = NULL;
74 PyObject *flags = NULL;
74 PyObject *flags = NULL;
75 int nlen;
75 int nlen;
76
76
77 if (!*cur) {
77 if (!*cur) {
78 zero = cur;
78 zero = cur;
79 continue;
79 continue;
80 }
80 }
81 else if (*cur != '\n')
81 else if (*cur != '\n')
82 continue;
82 continue;
83
83
84 if (!zero) {
84 if (!zero) {
85 PyErr_SetString(PyExc_ValueError,
85 PyErr_SetString(PyExc_ValueError,
86 "manifest entry has no separator");
86 "manifest entry has no separator");
87 goto quit;
87 goto quit;
88 }
88 }
89
89
90 file = PyBytes_FromStringAndSize(start, zero - start);
90 file = PyBytes_FromStringAndSize(start, zero - start);
91
91
92 if (!file)
92 if (!file)
93 goto bail;
93 goto bail;
94
94
95 nlen = cur - zero - 1;
95 nlen = cur - zero - 1;
96
96
97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
98 if (!node)
98 if (!node)
99 goto bail;
99 goto bail;
100
100
101 if (nlen > 40) {
101 if (nlen > 40) {
102 flags = PyBytes_FromStringAndSize(zero + 41,
102 flags = PyBytes_FromStringAndSize(zero + 41,
103 nlen - 40);
103 nlen - 40);
104 if (!flags)
104 if (!flags)
105 goto bail;
105 goto bail;
106
106
107 if (PyDict_SetItem(fdict, file, flags) == -1)
107 if (PyDict_SetItem(fdict, file, flags) == -1)
108 goto bail;
108 goto bail;
109 }
109 }
110
110
111 if (PyDict_SetItem(mfdict, file, node) == -1)
111 if (PyDict_SetItem(mfdict, file, node) == -1)
112 goto bail;
112 goto bail;
113
113
114 start = cur + 1;
114 start = cur + 1;
115 zero = NULL;
115 zero = NULL;
116
116
117 Py_XDECREF(flags);
117 Py_XDECREF(flags);
118 Py_XDECREF(node);
118 Py_XDECREF(node);
119 Py_XDECREF(file);
119 Py_XDECREF(file);
120 continue;
120 continue;
121 bail:
121 bail:
122 Py_XDECREF(flags);
122 Py_XDECREF(flags);
123 Py_XDECREF(node);
123 Py_XDECREF(node);
124 Py_XDECREF(file);
124 Py_XDECREF(file);
125 goto quit;
125 goto quit;
126 }
126 }
127
127
128 if (len > 0 && *(cur - 1) != '\n') {
128 if (len > 0 && *(cur - 1) != '\n') {
129 PyErr_SetString(PyExc_ValueError,
129 PyErr_SetString(PyExc_ValueError,
130 "manifest contains trailing garbage");
130 "manifest contains trailing garbage");
131 goto quit;
131 goto quit;
132 }
132 }
133
133
134 Py_INCREF(Py_None);
134 Py_INCREF(Py_None);
135 return Py_None;
135 return Py_None;
136 quit:
136 quit:
137 return NULL;
137 return NULL;
138 }
138 }
139
139
140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 {
141 {
142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 char *str, *cur, *end, *cpos;
144 char *str, *cur, *end, *cpos;
145 int state, mode, size, mtime;
145 int state, mode, size, mtime;
146 unsigned int flen;
146 unsigned int flen;
147 int len;
147 int len;
148
148
149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 &PyDict_Type, &dmap,
150 &PyDict_Type, &dmap,
151 &PyDict_Type, &cmap,
151 &PyDict_Type, &cmap,
152 &str, &len))
152 &str, &len))
153 goto quit;
153 goto quit;
154
154
155 /* read parents */
155 /* read parents */
156 if (len < 40)
156 if (len < 40)
157 goto quit;
157 goto quit;
158
158
159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 if (!parents)
160 if (!parents)
161 goto quit;
161 goto quit;
162
162
163 /* read filenames */
163 /* read filenames */
164 cur = str + 40;
164 cur = str + 40;
165 end = str + len;
165 end = str + len;
166
166
167 while (cur < end - 17) {
167 while (cur < end - 17) {
168 /* unpack header */
168 /* unpack header */
169 state = *cur;
169 state = *cur;
170 mode = getbe32(cur + 1);
170 mode = getbe32(cur + 1);
171 size = getbe32(cur + 5);
171 size = getbe32(cur + 5);
172 mtime = getbe32(cur + 9);
172 mtime = getbe32(cur + 9);
173 flen = getbe32(cur + 13);
173 flen = getbe32(cur + 13);
174 cur += 17;
174 cur += 17;
175 if (cur + flen > end || cur + flen < cur) {
175 if (cur + flen > end || cur + flen < cur) {
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 goto quit;
177 goto quit;
178 }
178 }
179
179
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 if (!entry)
181 if (!entry)
182 goto quit;
182 goto quit;
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184
184
185 cpos = memchr(cur, 0, flen);
185 cpos = memchr(cur, 0, flen);
186 if (cpos) {
186 if (cpos) {
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 cname = PyBytes_FromStringAndSize(cpos + 1,
188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 flen - (cpos - cur) - 1);
189 flen - (cpos - cur) - 1);
190 if (!fname || !cname ||
190 if (!fname || !cname ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 PyDict_SetItem(dmap, fname, entry) == -1)
192 PyDict_SetItem(dmap, fname, entry) == -1)
193 goto quit;
193 goto quit;
194 Py_DECREF(cname);
194 Py_DECREF(cname);
195 } else {
195 } else {
196 fname = PyBytes_FromStringAndSize(cur, flen);
196 fname = PyBytes_FromStringAndSize(cur, flen);
197 if (!fname ||
197 if (!fname ||
198 PyDict_SetItem(dmap, fname, entry) == -1)
198 PyDict_SetItem(dmap, fname, entry) == -1)
199 goto quit;
199 goto quit;
200 }
200 }
201 cur += flen;
201 cur += flen;
202 Py_DECREF(fname);
202 Py_DECREF(fname);
203 Py_DECREF(entry);
203 Py_DECREF(entry);
204 fname = cname = entry = NULL;
204 fname = cname = entry = NULL;
205 }
205 }
206
206
207 ret = parents;
207 ret = parents;
208 Py_INCREF(ret);
208 Py_INCREF(ret);
209 quit:
209 quit:
210 Py_XDECREF(fname);
210 Py_XDECREF(fname);
211 Py_XDECREF(cname);
211 Py_XDECREF(cname);
212 Py_XDECREF(entry);
212 Py_XDECREF(entry);
213 Py_XDECREF(parents);
213 Py_XDECREF(parents);
214 return ret;
214 return ret;
215 }
215 }
216
216
217 /*
217 /*
218 * A base-16 trie for fast node->rev mapping.
218 * A base-16 trie for fast node->rev mapping.
219 *
219 *
220 * Positive value is index of the next node in the trie
220 * Positive value is index of the next node in the trie
221 * Negative value is a leaf: -(rev + 1)
221 * Negative value is a leaf: -(rev + 1)
222 * Zero is empty
222 * Zero is empty
223 */
223 */
224 typedef struct {
224 typedef struct {
225 int children[16];
225 int children[16];
226 } nodetree;
226 } nodetree;
227
227
228 /*
228 /*
229 * This class has two behaviours.
229 * This class has two behaviours.
230 *
230 *
231 * When used in a list-like way (with integer keys), we decode an
231 * When used in a list-like way (with integer keys), we decode an
232 * entry in a RevlogNG index file on demand. Our last entry is a
232 * entry in a RevlogNG index file on demand. Our last entry is a
233 * sentinel, always a nullid. We have limited support for
233 * sentinel, always a nullid. We have limited support for
234 * integer-keyed insert and delete, only at elements right before the
234 * integer-keyed insert and delete, only at elements right before the
235 * sentinel.
235 * sentinel.
236 *
236 *
237 * With string keys, we lazily perform a reverse mapping from node to
237 * With string keys, we lazily perform a reverse mapping from node to
238 * rev, using a base-16 trie.
238 * rev, using a base-16 trie.
239 */
239 */
240 typedef struct {
240 typedef struct {
241 PyObject_HEAD
241 PyObject_HEAD
242 /* Type-specific fields go here. */
242 /* Type-specific fields go here. */
243 PyObject *data; /* raw bytes of index */
243 PyObject *data; /* raw bytes of index */
244 PyObject **cache; /* cached tuples */
244 PyObject **cache; /* cached tuples */
245 const char **offsets; /* populated on demand */
245 const char **offsets; /* populated on demand */
246 Py_ssize_t raw_length; /* original number of elements */
246 Py_ssize_t raw_length; /* original number of elements */
247 Py_ssize_t length; /* current number of elements */
247 Py_ssize_t length; /* current number of elements */
248 PyObject *added; /* populated on demand */
248 PyObject *added; /* populated on demand */
249 nodetree *nt; /* base-16 trie */
249 nodetree *nt; /* base-16 trie */
250 int ntlength; /* # nodes in use */
250 int ntlength; /* # nodes in use */
251 int ntcapacity; /* # nodes allocated */
251 int ntcapacity; /* # nodes allocated */
252 int ntdepth; /* maximum depth of tree */
252 int ntdepth; /* maximum depth of tree */
253 int ntsplits; /* # splits performed */
253 int ntsplits; /* # splits performed */
254 int ntrev; /* last rev scanned */
254 int ntrev; /* last rev scanned */
255 int ntlookups; /* # lookups */
255 int ntlookups; /* # lookups */
256 int ntmisses; /* # lookups that miss the cache */
256 int ntmisses; /* # lookups that miss the cache */
257 int inlined;
257 int inlined;
258 } indexObject;
258 } indexObject;
259
259
260 static Py_ssize_t index_length(const indexObject *self)
260 static Py_ssize_t index_length(const indexObject *self)
261 {
261 {
262 if (self->added == NULL)
262 if (self->added == NULL)
263 return self->length;
263 return self->length;
264 return self->length + PyList_GET_SIZE(self->added);
264 return self->length + PyList_GET_SIZE(self->added);
265 }
265 }
266
266
267 static PyObject *nullentry;
267 static PyObject *nullentry;
268 static const char nullid[20];
268 static const char nullid[20];
269
269
270 static long inline_scan(indexObject *self, const char **offsets);
270 static long inline_scan(indexObject *self, const char **offsets);
271
271
272 #if LONG_MAX == 0x7fffffffL
272 #if LONG_MAX == 0x7fffffffL
273 static char *tuple_format = "Kiiiiiis#";
273 static char *tuple_format = "Kiiiiiis#";
274 #else
274 #else
275 static char *tuple_format = "kiiiiiis#";
275 static char *tuple_format = "kiiiiiis#";
276 #endif
276 #endif
277
277
278 /*
278 /*
279 * Return a pointer to the beginning of a RevlogNG record.
279 * Return a pointer to the beginning of a RevlogNG record.
280 */
280 */
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 {
282 {
283 if (self->inlined && pos > 0) {
283 if (self->inlined && pos > 0) {
284 if (self->offsets == NULL) {
284 if (self->offsets == NULL) {
285 self->offsets = malloc(self->raw_length *
285 self->offsets = malloc(self->raw_length *
286 sizeof(*self->offsets));
286 sizeof(*self->offsets));
287 if (self->offsets == NULL)
287 if (self->offsets == NULL)
288 return (const char *)PyErr_NoMemory();
288 return (const char *)PyErr_NoMemory();
289 inline_scan(self, self->offsets);
289 inline_scan(self, self->offsets);
290 }
290 }
291 return self->offsets[pos];
291 return self->offsets[pos];
292 }
292 }
293
293
294 return PyString_AS_STRING(self->data) + pos * 64;
294 return PyString_AS_STRING(self->data) + pos * 64;
295 }
295 }
296
296
297 /*
297 /*
298 * RevlogNG format (all in big endian, data may be inlined):
298 * RevlogNG format (all in big endian, data may be inlined):
299 * 6 bytes: offset
299 * 6 bytes: offset
300 * 2 bytes: flags
300 * 2 bytes: flags
301 * 4 bytes: compressed length
301 * 4 bytes: compressed length
302 * 4 bytes: uncompressed length
302 * 4 bytes: uncompressed length
303 * 4 bytes: base revision
303 * 4 bytes: base revision
304 * 4 bytes: link revision
304 * 4 bytes: link revision
305 * 4 bytes: parent 1 revision
305 * 4 bytes: parent 1 revision
306 * 4 bytes: parent 2 revision
306 * 4 bytes: parent 2 revision
307 * 32 bytes: nodeid (only 20 bytes used)
307 * 32 bytes: nodeid (only 20 bytes used)
308 */
308 */
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 {
310 {
311 uint64_t offset_flags;
311 uint64_t offset_flags;
312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 const char *c_node_id;
313 const char *c_node_id;
314 const char *data;
314 const char *data;
315 Py_ssize_t length = index_length(self);
315 Py_ssize_t length = index_length(self);
316 PyObject *entry;
316 PyObject *entry;
317
317
318 if (pos < 0)
318 if (pos < 0)
319 pos += length;
319 pos += length;
320
320
321 if (pos < 0 || pos >= length) {
321 if (pos < 0 || pos >= length) {
322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
323 return NULL;
323 return NULL;
324 }
324 }
325
325
326 if (pos == length - 1) {
326 if (pos == length - 1) {
327 Py_INCREF(nullentry);
327 Py_INCREF(nullentry);
328 return nullentry;
328 return nullentry;
329 }
329 }
330
330
331 if (pos >= self->length - 1) {
331 if (pos >= self->length - 1) {
332 PyObject *obj;
332 PyObject *obj;
333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
334 Py_INCREF(obj);
334 Py_INCREF(obj);
335 return obj;
335 return obj;
336 }
336 }
337
337
338 if (self->cache) {
338 if (self->cache) {
339 if (self->cache[pos]) {
339 if (self->cache[pos]) {
340 Py_INCREF(self->cache[pos]);
340 Py_INCREF(self->cache[pos]);
341 return self->cache[pos];
341 return self->cache[pos];
342 }
342 }
343 } else {
343 } else {
344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
345 if (self->cache == NULL)
345 if (self->cache == NULL)
346 return PyErr_NoMemory();
346 return PyErr_NoMemory();
347 }
347 }
348
348
349 data = index_deref(self, pos);
349 data = index_deref(self, pos);
350 if (data == NULL)
350 if (data == NULL)
351 return NULL;
351 return NULL;
352
352
353 offset_flags = getbe32(data + 4);
353 offset_flags = getbe32(data + 4);
354 if (pos == 0) /* mask out version number for the first entry */
354 if (pos == 0) /* mask out version number for the first entry */
355 offset_flags &= 0xFFFF;
355 offset_flags &= 0xFFFF;
356 else {
356 else {
357 uint32_t offset_high = getbe32(data);
357 uint32_t offset_high = getbe32(data);
358 offset_flags |= ((uint64_t)offset_high) << 32;
358 offset_flags |= ((uint64_t)offset_high) << 32;
359 }
359 }
360
360
361 comp_len = getbe32(data + 8);
361 comp_len = getbe32(data + 8);
362 uncomp_len = getbe32(data + 12);
362 uncomp_len = getbe32(data + 12);
363 base_rev = getbe32(data + 16);
363 base_rev = getbe32(data + 16);
364 link_rev = getbe32(data + 20);
364 link_rev = getbe32(data + 20);
365 parent_1 = getbe32(data + 24);
365 parent_1 = getbe32(data + 24);
366 parent_2 = getbe32(data + 28);
366 parent_2 = getbe32(data + 28);
367 c_node_id = data + 32;
367 c_node_id = data + 32;
368
368
369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
370 uncomp_len, base_rev, link_rev,
370 uncomp_len, base_rev, link_rev,
371 parent_1, parent_2, c_node_id, 20);
371 parent_1, parent_2, c_node_id, 20);
372
372
373 if (entry)
373 if (entry)
374 PyObject_GC_UnTrack(entry);
374 PyObject_GC_UnTrack(entry);
375
375
376 self->cache[pos] = entry;
376 self->cache[pos] = entry;
377 Py_INCREF(entry);
377 Py_INCREF(entry);
378
378
379 return entry;
379 return entry;
380 }
380 }
381
381
382 /*
382 /*
383 * Return the 20-byte SHA of the node corresponding to the given rev.
383 * Return the 20-byte SHA of the node corresponding to the given rev.
384 */
384 */
385 static const char *index_node(indexObject *self, Py_ssize_t pos)
385 static const char *index_node(indexObject *self, Py_ssize_t pos)
386 {
386 {
387 Py_ssize_t length = index_length(self);
387 Py_ssize_t length = index_length(self);
388 const char *data;
388 const char *data;
389
389
390 if (pos == length - 1 || pos == INT_MAX)
390 if (pos == length - 1 || pos == INT_MAX)
391 return nullid;
391 return nullid;
392
392
393 if (pos >= length)
393 if (pos >= length)
394 return NULL;
394 return NULL;
395
395
396 if (pos >= self->length - 1) {
396 if (pos >= self->length - 1) {
397 PyObject *tuple, *str;
397 PyObject *tuple, *str;
398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
399 str = PyTuple_GetItem(tuple, 7);
399 str = PyTuple_GetItem(tuple, 7);
400 return str ? PyString_AS_STRING(str) : NULL;
400 return str ? PyString_AS_STRING(str) : NULL;
401 }
401 }
402
402
403 data = index_deref(self, pos);
403 data = index_deref(self, pos);
404 return data ? data + 32 : NULL;
404 return data ? data + 32 : NULL;
405 }
405 }
406
406
407 static int nt_insert(indexObject *self, const char *node, int rev);
407 static int nt_insert(indexObject *self, const char *node, int rev);
408
408
409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
410 {
410 {
411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
412 return -1;
412 return -1;
413 if (*nodelen == 20)
413 if (*nodelen == 20)
414 return 0;
414 return 0;
415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
416 return -1;
416 return -1;
417 }
417 }
418
418
419 static PyObject *index_insert(indexObject *self, PyObject *args)
419 static PyObject *index_insert(indexObject *self, PyObject *args)
420 {
420 {
421 PyObject *obj;
421 PyObject *obj;
422 char *node;
422 char *node;
423 long offset;
423 long offset;
424 Py_ssize_t len, nodelen;
424 Py_ssize_t len, nodelen;
425
425
426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
427 return NULL;
427 return NULL;
428
428
429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
431 return NULL;
431 return NULL;
432 }
432 }
433
433
434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
435 return NULL;
435 return NULL;
436
436
437 len = index_length(self);
437 len = index_length(self);
438
438
439 if (offset < 0)
439 if (offset < 0)
440 offset += len;
440 offset += len;
441
441
442 if (offset != len - 1) {
442 if (offset != len - 1) {
443 PyErr_SetString(PyExc_IndexError,
443 PyErr_SetString(PyExc_IndexError,
444 "insert only supported at index -1");
444 "insert only supported at index -1");
445 return NULL;
445 return NULL;
446 }
446 }
447
447
448 if (offset > INT_MAX) {
448 if (offset > INT_MAX) {
449 PyErr_SetString(PyExc_ValueError,
449 PyErr_SetString(PyExc_ValueError,
450 "currently only 2**31 revs supported");
450 "currently only 2**31 revs supported");
451 return NULL;
451 return NULL;
452 }
452 }
453
453
454 if (self->added == NULL) {
454 if (self->added == NULL) {
455 self->added = PyList_New(0);
455 self->added = PyList_New(0);
456 if (self->added == NULL)
456 if (self->added == NULL)
457 return NULL;
457 return NULL;
458 }
458 }
459
459
460 if (PyList_Append(self->added, obj) == -1)
460 if (PyList_Append(self->added, obj) == -1)
461 return NULL;
461 return NULL;
462
462
463 if (self->nt)
463 if (self->nt)
464 nt_insert(self, node, (int)offset);
464 nt_insert(self, node, (int)offset);
465
465
466 Py_RETURN_NONE;
466 Py_RETURN_NONE;
467 }
467 }
468
468
469 static void _index_clearcaches(indexObject *self)
469 static void _index_clearcaches(indexObject *self)
470 {
470 {
471 if (self->cache) {
471 if (self->cache) {
472 Py_ssize_t i;
472 Py_ssize_t i;
473
473
474 for (i = 0; i < self->raw_length; i++) {
474 for (i = 0; i < self->raw_length; i++) {
475 if (self->cache[i]) {
475 if (self->cache[i]) {
476 Py_DECREF(self->cache[i]);
476 Py_DECREF(self->cache[i]);
477 self->cache[i] = NULL;
477 self->cache[i] = NULL;
478 }
478 }
479 }
479 }
480 free(self->cache);
480 free(self->cache);
481 self->cache = NULL;
481 self->cache = NULL;
482 }
482 }
483 if (self->offsets) {
483 if (self->offsets) {
484 free(self->offsets);
484 free(self->offsets);
485 self->offsets = NULL;
485 self->offsets = NULL;
486 }
486 }
487 if (self->nt) {
487 if (self->nt) {
488 free(self->nt);
488 free(self->nt);
489 self->nt = NULL;
489 self->nt = NULL;
490 }
490 }
491 }
491 }
492
492
493 static PyObject *index_clearcaches(indexObject *self)
493 static PyObject *index_clearcaches(indexObject *self)
494 {
494 {
495 _index_clearcaches(self);
495 _index_clearcaches(self);
496 self->ntlength = self->ntcapacity = 0;
496 self->ntlength = self->ntcapacity = 0;
497 self->ntdepth = self->ntsplits = 0;
497 self->ntdepth = self->ntsplits = 0;
498 self->ntrev = -1;
498 self->ntrev = -1;
499 self->ntlookups = self->ntmisses = 0;
499 self->ntlookups = self->ntmisses = 0;
500 Py_RETURN_NONE;
500 Py_RETURN_NONE;
501 }
501 }
502
502
503 static PyObject *index_stats(indexObject *self)
503 static PyObject *index_stats(indexObject *self)
504 {
504 {
505 PyObject *obj = PyDict_New();
505 PyObject *obj = PyDict_New();
506
506
507 if (obj == NULL)
507 if (obj == NULL)
508 return NULL;
508 return NULL;
509
509
510 #define istat(__n, __d) \
510 #define istat(__n, __d) \
511 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
511 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
512 goto bail;
512 goto bail;
513
513
514 if (self->added) {
514 if (self->added) {
515 Py_ssize_t len = PyList_GET_SIZE(self->added);
515 Py_ssize_t len = PyList_GET_SIZE(self->added);
516 if (PyDict_SetItemString(obj, "index entries added",
516 if (PyDict_SetItemString(obj, "index entries added",
517 PyInt_FromSsize_t(len)) == -1)
517 PyInt_FromSsize_t(len)) == -1)
518 goto bail;
518 goto bail;
519 }
519 }
520
520
521 if (self->raw_length != self->length - 1)
521 if (self->raw_length != self->length - 1)
522 istat(raw_length, "revs on disk");
522 istat(raw_length, "revs on disk");
523 istat(length, "revs in memory");
523 istat(length, "revs in memory");
524 istat(ntcapacity, "node trie capacity");
524 istat(ntcapacity, "node trie capacity");
525 istat(ntdepth, "node trie depth");
525 istat(ntdepth, "node trie depth");
526 istat(ntlength, "node trie count");
526 istat(ntlength, "node trie count");
527 istat(ntlookups, "node trie lookups");
527 istat(ntlookups, "node trie lookups");
528 istat(ntmisses, "node trie misses");
528 istat(ntmisses, "node trie misses");
529 istat(ntrev, "node trie last rev scanned");
529 istat(ntrev, "node trie last rev scanned");
530 istat(ntsplits, "node trie splits");
530 istat(ntsplits, "node trie splits");
531
531
532 #undef istat
532 #undef istat
533
533
534 return obj;
534 return obj;
535
535
536 bail:
536 bail:
537 Py_XDECREF(obj);
537 Py_XDECREF(obj);
538 return NULL;
538 return NULL;
539 }
539 }
540
540
541 static inline int nt_level(const char *node, Py_ssize_t level)
541 static inline int nt_level(const char *node, Py_ssize_t level)
542 {
542 {
543 int v = node[level>>1];
543 int v = node[level>>1];
544 if (!(level & 1))
544 if (!(level & 1))
545 v >>= 4;
545 v >>= 4;
546 return v & 0xf;
546 return v & 0xf;
547 }
547 }
548
548
549 /*
549 /*
550 * Return values:
550 * Return values:
551 *
551 *
552 * -4: match is ambiguous (multiple candidates)
552 * -4: match is ambiguous (multiple candidates)
553 * -2: not found
553 * -2: not found
554 * rest: valid rev
554 * rest: valid rev
555 */
555 */
556 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
556 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
557 int hex)
557 int hex)
558 {
558 {
559 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
559 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
560 int level, maxlevel, off;
560 int level, maxlevel, off;
561
561
562 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
562 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
563 return -1;
563 return -1;
564
564
565 if (self->nt == NULL)
565 if (self->nt == NULL)
566 return -2;
566 return -2;
567
567
568 if (hex)
568 if (hex)
569 maxlevel = nodelen > 40 ? 40 : nodelen;
569 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
570 else
570 else
571 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
571 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
572
572
573 for (level = off = 0; level < maxlevel; level++) {
573 for (level = off = 0; level < maxlevel; level++) {
574 int k = getnybble(node, level);
574 int k = getnybble(node, level);
575 nodetree *n = &self->nt[off];
575 nodetree *n = &self->nt[off];
576 int v = n->children[k];
576 int v = n->children[k];
577
577
578 if (v < 0) {
578 if (v < 0) {
579 const char *n;
579 const char *n;
580 Py_ssize_t i;
580 Py_ssize_t i;
581
581
582 v = -v - 1;
582 v = -v - 1;
583 n = index_node(self, v);
583 n = index_node(self, v);
584 if (n == NULL)
584 if (n == NULL)
585 return -2;
585 return -2;
586 for (i = level; i < maxlevel; i++)
586 for (i = level; i < maxlevel; i++)
587 if (getnybble(node, i) != nt_level(n, i))
587 if (getnybble(node, i) != nt_level(n, i))
588 return -2;
588 return -2;
589 return v;
589 return v;
590 }
590 }
591 if (v == 0)
591 if (v == 0)
592 return -2;
592 return -2;
593 off = v;
593 off = v;
594 }
594 }
595 /* multiple matches against an ambiguous prefix */
595 /* multiple matches against an ambiguous prefix */
596 return -4;
596 return -4;
597 }
597 }
598
598
599 static int nt_new(indexObject *self)
599 static int nt_new(indexObject *self)
600 {
600 {
601 if (self->ntlength == self->ntcapacity) {
601 if (self->ntlength == self->ntcapacity) {
602 self->ntcapacity *= 2;
602 self->ntcapacity *= 2;
603 self->nt = realloc(self->nt,
603 self->nt = realloc(self->nt,
604 self->ntcapacity * sizeof(nodetree));
604 self->ntcapacity * sizeof(nodetree));
605 if (self->nt == NULL) {
605 if (self->nt == NULL) {
606 PyErr_SetString(PyExc_MemoryError, "out of memory");
606 PyErr_SetString(PyExc_MemoryError, "out of memory");
607 return -1;
607 return -1;
608 }
608 }
609 memset(&self->nt[self->ntlength], 0,
609 memset(&self->nt[self->ntlength], 0,
610 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
610 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
611 }
611 }
612 return self->ntlength++;
612 return self->ntlength++;
613 }
613 }
614
614
615 static int nt_insert(indexObject *self, const char *node, int rev)
615 static int nt_insert(indexObject *self, const char *node, int rev)
616 {
616 {
617 int level = 0;
617 int level = 0;
618 int off = 0;
618 int off = 0;
619
619
620 while (level < 40) {
620 while (level < 40) {
621 int k = nt_level(node, level);
621 int k = nt_level(node, level);
622 nodetree *n;
622 nodetree *n;
623 int v;
623 int v;
624
624
625 n = &self->nt[off];
625 n = &self->nt[off];
626 v = n->children[k];
626 v = n->children[k];
627
627
628 if (v == 0) {
628 if (v == 0) {
629 n->children[k] = -rev - 1;
629 n->children[k] = -rev - 1;
630 return 0;
630 return 0;
631 }
631 }
632 if (v < 0) {
632 if (v < 0) {
633 const char *oldnode = index_node(self, -v - 1);
633 const char *oldnode = index_node(self, -v - 1);
634 int noff;
634 int noff;
635
635
636 if (!oldnode || !memcmp(oldnode, node, 20)) {
636 if (!oldnode || !memcmp(oldnode, node, 20)) {
637 n->children[k] = -rev - 1;
637 n->children[k] = -rev - 1;
638 return 0;
638 return 0;
639 }
639 }
640 noff = nt_new(self);
640 noff = nt_new(self);
641 if (noff == -1)
641 if (noff == -1)
642 return -1;
642 return -1;
643 /* self->nt may have been changed by realloc */
643 /* self->nt may have been changed by realloc */
644 self->nt[off].children[k] = noff;
644 self->nt[off].children[k] = noff;
645 off = noff;
645 off = noff;
646 n = &self->nt[off];
646 n = &self->nt[off];
647 n->children[nt_level(oldnode, ++level)] = v;
647 n->children[nt_level(oldnode, ++level)] = v;
648 if (level > self->ntdepth)
648 if (level > self->ntdepth)
649 self->ntdepth = level;
649 self->ntdepth = level;
650 self->ntsplits += 1;
650 self->ntsplits += 1;
651 } else {
651 } else {
652 level += 1;
652 level += 1;
653 off = v;
653 off = v;
654 }
654 }
655 }
655 }
656
656
657 return -1;
657 return -1;
658 }
658 }
659
659
660 static int nt_init(indexObject *self)
660 static int nt_init(indexObject *self)
661 {
661 {
662 if (self->nt == NULL) {
662 if (self->nt == NULL) {
663 self->ntcapacity = self->raw_length < 4
663 self->ntcapacity = self->raw_length < 4
664 ? 4 : self->raw_length / 2;
664 ? 4 : self->raw_length / 2;
665 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
665 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
666 if (self->nt == NULL) {
666 if (self->nt == NULL) {
667 PyErr_NoMemory();
667 PyErr_NoMemory();
668 return -1;
668 return -1;
669 }
669 }
670 self->ntlength = 1;
670 self->ntlength = 1;
671 self->ntrev = (int)index_length(self) - 1;
671 self->ntrev = (int)index_length(self) - 1;
672 self->ntlookups = 1;
672 self->ntlookups = 1;
673 self->ntmisses = 0;
673 self->ntmisses = 0;
674 if (nt_insert(self, nullid, INT_MAX) == -1)
674 if (nt_insert(self, nullid, INT_MAX) == -1)
675 return -1;
675 return -1;
676 }
676 }
677 return 0;
677 return 0;
678 }
678 }
679
679
680 /*
680 /*
681 * Return values:
681 * Return values:
682 *
682 *
683 * -3: error (exception set)
683 * -3: error (exception set)
684 * -2: not found (no exception set)
684 * -2: not found (no exception set)
685 * rest: valid rev
685 * rest: valid rev
686 */
686 */
687 static int index_find_node(indexObject *self,
687 static int index_find_node(indexObject *self,
688 const char *node, Py_ssize_t nodelen)
688 const char *node, Py_ssize_t nodelen)
689 {
689 {
690 int rev;
690 int rev;
691
691
692 self->ntlookups++;
692 self->ntlookups++;
693 rev = nt_find(self, node, nodelen, 0);
693 rev = nt_find(self, node, nodelen, 0);
694 if (rev >= -1)
694 if (rev >= -1)
695 return rev;
695 return rev;
696
696
697 if (nt_init(self) == -1)
697 if (nt_init(self) == -1)
698 return -3;
698 return -3;
699
699
700 /*
700 /*
701 * For the first handful of lookups, we scan the entire index,
701 * For the first handful of lookups, we scan the entire index,
702 * and cache only the matching nodes. This optimizes for cases
702 * and cache only the matching nodes. This optimizes for cases
703 * like "hg tip", where only a few nodes are accessed.
703 * like "hg tip", where only a few nodes are accessed.
704 *
704 *
705 * After that, we cache every node we visit, using a single
705 * After that, we cache every node we visit, using a single
706 * scan amortized over multiple lookups. This gives the best
706 * scan amortized over multiple lookups. This gives the best
707 * bulk performance, e.g. for "hg log".
707 * bulk performance, e.g. for "hg log".
708 */
708 */
709 if (self->ntmisses++ < 4) {
709 if (self->ntmisses++ < 4) {
710 for (rev = self->ntrev - 1; rev >= 0; rev--) {
710 for (rev = self->ntrev - 1; rev >= 0; rev--) {
711 const char *n = index_node(self, rev);
711 const char *n = index_node(self, rev);
712 if (n == NULL)
712 if (n == NULL)
713 return -2;
713 return -2;
714 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
714 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
715 if (nt_insert(self, n, rev) == -1)
715 if (nt_insert(self, n, rev) == -1)
716 return -3;
716 return -3;
717 break;
717 break;
718 }
718 }
719 }
719 }
720 } else {
720 } else {
721 for (rev = self->ntrev - 1; rev >= 0; rev--) {
721 for (rev = self->ntrev - 1; rev >= 0; rev--) {
722 const char *n = index_node(self, rev);
722 const char *n = index_node(self, rev);
723 if (n == NULL) {
723 if (n == NULL) {
724 self->ntrev = rev + 1;
724 self->ntrev = rev + 1;
725 return -2;
725 return -2;
726 }
726 }
727 if (nt_insert(self, n, rev) == -1) {
727 if (nt_insert(self, n, rev) == -1) {
728 self->ntrev = rev + 1;
728 self->ntrev = rev + 1;
729 return -3;
729 return -3;
730 }
730 }
731 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
731 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
732 break;
732 break;
733 }
733 }
734 }
734 }
735 self->ntrev = rev;
735 self->ntrev = rev;
736 }
736 }
737
737
738 if (rev >= 0)
738 if (rev >= 0)
739 return rev;
739 return rev;
740 return -2;
740 return -2;
741 }
741 }
742
742
743 static PyObject *raise_revlog_error(void)
743 static PyObject *raise_revlog_error(void)
744 {
744 {
745 static PyObject *errclass;
745 static PyObject *errclass;
746 PyObject *mod = NULL, *errobj;
746 PyObject *mod = NULL, *errobj;
747
747
748 if (errclass == NULL) {
748 if (errclass == NULL) {
749 PyObject *dict;
749 PyObject *dict;
750
750
751 mod = PyImport_ImportModule("mercurial.error");
751 mod = PyImport_ImportModule("mercurial.error");
752 if (mod == NULL)
752 if (mod == NULL)
753 goto classfail;
753 goto classfail;
754
754
755 dict = PyModule_GetDict(mod);
755 dict = PyModule_GetDict(mod);
756 if (dict == NULL)
756 if (dict == NULL)
757 goto classfail;
757 goto classfail;
758
758
759 errclass = PyDict_GetItemString(dict, "RevlogError");
759 errclass = PyDict_GetItemString(dict, "RevlogError");
760 if (errclass == NULL) {
760 if (errclass == NULL) {
761 PyErr_SetString(PyExc_SystemError,
761 PyErr_SetString(PyExc_SystemError,
762 "could not find RevlogError");
762 "could not find RevlogError");
763 goto classfail;
763 goto classfail;
764 }
764 }
765 Py_INCREF(errclass);
765 Py_INCREF(errclass);
766 }
766 }
767
767
768 errobj = PyObject_CallFunction(errclass, NULL);
768 errobj = PyObject_CallFunction(errclass, NULL);
769 if (errobj == NULL)
769 if (errobj == NULL)
770 return NULL;
770 return NULL;
771 PyErr_SetObject(errclass, errobj);
771 PyErr_SetObject(errclass, errobj);
772 return errobj;
772 return errobj;
773
773
774 classfail:
774 classfail:
775 Py_XDECREF(mod);
775 Py_XDECREF(mod);
776 return NULL;
776 return NULL;
777 }
777 }
778
778
779 static PyObject *index_getitem(indexObject *self, PyObject *value)
779 static PyObject *index_getitem(indexObject *self, PyObject *value)
780 {
780 {
781 char *node;
781 char *node;
782 Py_ssize_t nodelen;
782 Py_ssize_t nodelen;
783 int rev;
783 int rev;
784
784
785 if (PyInt_Check(value))
785 if (PyInt_Check(value))
786 return index_get(self, PyInt_AS_LONG(value));
786 return index_get(self, PyInt_AS_LONG(value));
787
787
788 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
788 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
789 return NULL;
789 return NULL;
790 rev = index_find_node(self, node, nodelen);
790 rev = index_find_node(self, node, nodelen);
791 if (rev >= -1)
791 if (rev >= -1)
792 return PyInt_FromLong(rev);
792 return PyInt_FromLong(rev);
793 if (rev == -2)
793 if (rev == -2)
794 raise_revlog_error();
794 raise_revlog_error();
795 return NULL;
795 return NULL;
796 }
796 }
797
797
798 static int nt_partialmatch(indexObject *self, const char *node,
799 Py_ssize_t nodelen)
800 {
801 int rev;
802
803 if (nt_init(self) == -1)
804 return -3;
805
806 if (self->ntrev > 0) {
807 /* ensure that the radix tree is fully populated */
808 for (rev = self->ntrev - 1; rev >= 0; rev--) {
809 const char *n = index_node(self, rev);
810 if (n == NULL)
811 return -2;
812 if (nt_insert(self, n, rev) == -1)
813 return -3;
814 }
815 self->ntrev = rev;
816 }
817
818 return nt_find(self, node, nodelen, 1);
819 }
820
821 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
822 {
823 const char *fullnode;
824 int nodelen;
825 char *node;
826 int rev, i;
827
828 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
829 return NULL;
830
831 if (nodelen < 4) {
832 PyErr_SetString(PyExc_ValueError, "key too short");
833 return NULL;
834 }
835
836 if (nodelen > 40)
837 nodelen = 40;
838
839 for (i = 0; i < nodelen; i++)
840 hexdigit(node, i);
841 if (PyErr_Occurred()) {
842 /* input contains non-hex characters */
843 PyErr_Clear();
844 Py_RETURN_NONE;
845 }
846
847 rev = nt_partialmatch(self, node, nodelen);
848
849 switch (rev) {
850 case -4:
851 raise_revlog_error();
852 case -3:
853 return NULL;
854 case -2:
855 Py_RETURN_NONE;
856 case -1:
857 return PyString_FromStringAndSize(nullid, 20);
858 }
859
860 fullnode = index_node(self, rev);
861 if (fullnode == NULL) {
862 PyErr_Format(PyExc_IndexError,
863 "could not access rev %d", rev);
864 return NULL;
865 }
866 return PyString_FromStringAndSize(fullnode, 20);
867 }
868
798 static PyObject *index_m_get(indexObject *self, PyObject *args)
869 static PyObject *index_m_get(indexObject *self, PyObject *args)
799 {
870 {
800 char *node;
871 char *node;
801 int nodelen, rev;
872 int nodelen, rev;
802
873
803 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
874 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
804 return NULL;
875 return NULL;
805
876
806 rev = index_find_node(self, node, nodelen);
877 rev = index_find_node(self, node, nodelen);
807 if (rev == -3)
878 if (rev == -3)
808 return NULL;
879 return NULL;
809 if (rev == -2)
880 if (rev == -2)
810 Py_RETURN_NONE;
881 Py_RETURN_NONE;
811 return PyInt_FromLong(rev);
882 return PyInt_FromLong(rev);
812 }
883 }
813
884
814 static int index_contains(indexObject *self, PyObject *value)
885 static int index_contains(indexObject *self, PyObject *value)
815 {
886 {
816 char *node;
887 char *node;
817 Py_ssize_t nodelen;
888 Py_ssize_t nodelen;
818
889
819 if (PyInt_Check(value)) {
890 if (PyInt_Check(value)) {
820 long rev = PyInt_AS_LONG(value);
891 long rev = PyInt_AS_LONG(value);
821 return rev >= -1 && rev < index_length(self);
892 return rev >= -1 && rev < index_length(self);
822 }
893 }
823
894
824 if (!PyString_Check(value))
895 if (!PyString_Check(value))
825 return 0;
896 return 0;
826
897
827 node = PyString_AS_STRING(value);
898 node = PyString_AS_STRING(value);
828 nodelen = PyString_GET_SIZE(value);
899 nodelen = PyString_GET_SIZE(value);
829
900
830 switch (index_find_node(self, node, nodelen)) {
901 switch (index_find_node(self, node, nodelen)) {
831 case -3:
902 case -3:
832 return -1;
903 return -1;
833 case -2:
904 case -2:
834 return 0;
905 return 0;
835 default:
906 default:
836 return 1;
907 return 1;
837 }
908 }
838 }
909 }
839
910
840 /*
911 /*
841 * Invalidate any trie entries introduced by added revs.
912 * Invalidate any trie entries introduced by added revs.
842 */
913 */
843 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
914 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
844 {
915 {
845 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
916 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
846
917
847 for (i = start; i < len; i++) {
918 for (i = start; i < len; i++) {
848 PyObject *tuple = PyList_GET_ITEM(self->added, i);
919 PyObject *tuple = PyList_GET_ITEM(self->added, i);
849 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
920 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
850
921
851 nt_insert(self, PyString_AS_STRING(node), -1);
922 nt_insert(self, PyString_AS_STRING(node), -1);
852 }
923 }
853
924
854 if (start == 0) {
925 if (start == 0) {
855 Py_DECREF(self->added);
926 Py_DECREF(self->added);
856 self->added = NULL;
927 self->added = NULL;
857 }
928 }
858 }
929 }
859
930
860 /*
931 /*
861 * Delete a numeric range of revs, which must be at the end of the
932 * Delete a numeric range of revs, which must be at the end of the
862 * range, but exclude the sentinel nullid entry.
933 * range, but exclude the sentinel nullid entry.
863 */
934 */
864 static int index_slice_del(indexObject *self, PyObject *item)
935 static int index_slice_del(indexObject *self, PyObject *item)
865 {
936 {
866 Py_ssize_t start, stop, step, slicelength;
937 Py_ssize_t start, stop, step, slicelength;
867 Py_ssize_t length = index_length(self);
938 Py_ssize_t length = index_length(self);
868
939
869 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
940 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
870 &start, &stop, &step, &slicelength) < 0)
941 &start, &stop, &step, &slicelength) < 0)
871 return -1;
942 return -1;
872
943
873 if (slicelength <= 0)
944 if (slicelength <= 0)
874 return 0;
945 return 0;
875
946
876 if ((step < 0 && start < stop) || (step > 0 && start > stop))
947 if ((step < 0 && start < stop) || (step > 0 && start > stop))
877 stop = start;
948 stop = start;
878
949
879 if (step < 0) {
950 if (step < 0) {
880 stop = start + 1;
951 stop = start + 1;
881 start = stop + step*(slicelength - 1) - 1;
952 start = stop + step*(slicelength - 1) - 1;
882 step = -step;
953 step = -step;
883 }
954 }
884
955
885 if (step != 1) {
956 if (step != 1) {
886 PyErr_SetString(PyExc_ValueError,
957 PyErr_SetString(PyExc_ValueError,
887 "revlog index delete requires step size of 1");
958 "revlog index delete requires step size of 1");
888 return -1;
959 return -1;
889 }
960 }
890
961
891 if (stop != length - 1) {
962 if (stop != length - 1) {
892 PyErr_SetString(PyExc_IndexError,
963 PyErr_SetString(PyExc_IndexError,
893 "revlog index deletion indices are invalid");
964 "revlog index deletion indices are invalid");
894 return -1;
965 return -1;
895 }
966 }
896
967
897 if (start < self->length - 1) {
968 if (start < self->length - 1) {
898 if (self->nt) {
969 if (self->nt) {
899 Py_ssize_t i;
970 Py_ssize_t i;
900
971
901 for (i = start + 1; i < self->length - 1; i++) {
972 for (i = start + 1; i < self->length - 1; i++) {
902 const char *node = index_node(self, i);
973 const char *node = index_node(self, i);
903
974
904 if (node)
975 if (node)
905 nt_insert(self, node, -1);
976 nt_insert(self, node, -1);
906 }
977 }
907 if (self->added)
978 if (self->added)
908 nt_invalidate_added(self, 0);
979 nt_invalidate_added(self, 0);
909 if (self->ntrev > start)
980 if (self->ntrev > start)
910 self->ntrev = (int)start;
981 self->ntrev = (int)start;
911 }
982 }
912 self->length = start + 1;
983 self->length = start + 1;
913 return 0;
984 return 0;
914 }
985 }
915
986
916 if (self->nt) {
987 if (self->nt) {
917 nt_invalidate_added(self, start - self->length + 1);
988 nt_invalidate_added(self, start - self->length + 1);
918 if (self->ntrev > start)
989 if (self->ntrev > start)
919 self->ntrev = (int)start;
990 self->ntrev = (int)start;
920 }
991 }
921 return self->added
992 return self->added
922 ? PyList_SetSlice(self->added, start - self->length + 1,
993 ? PyList_SetSlice(self->added, start - self->length + 1,
923 PyList_GET_SIZE(self->added), NULL)
994 PyList_GET_SIZE(self->added), NULL)
924 : 0;
995 : 0;
925 }
996 }
926
997
927 /*
998 /*
928 * Supported ops:
999 * Supported ops:
929 *
1000 *
930 * slice deletion
1001 * slice deletion
931 * string assignment (extend node->rev mapping)
1002 * string assignment (extend node->rev mapping)
932 * string deletion (shrink node->rev mapping)
1003 * string deletion (shrink node->rev mapping)
933 */
1004 */
934 static int index_assign_subscript(indexObject *self, PyObject *item,
1005 static int index_assign_subscript(indexObject *self, PyObject *item,
935 PyObject *value)
1006 PyObject *value)
936 {
1007 {
937 char *node;
1008 char *node;
938 Py_ssize_t nodelen;
1009 Py_ssize_t nodelen;
939 long rev;
1010 long rev;
940
1011
941 if (PySlice_Check(item) && value == NULL)
1012 if (PySlice_Check(item) && value == NULL)
942 return index_slice_del(self, item);
1013 return index_slice_del(self, item);
943
1014
944 if (node_check(item, &node, &nodelen) == -1)
1015 if (node_check(item, &node, &nodelen) == -1)
945 return -1;
1016 return -1;
946
1017
947 if (value == NULL)
1018 if (value == NULL)
948 return self->nt ? nt_insert(self, node, -1) : 0;
1019 return self->nt ? nt_insert(self, node, -1) : 0;
949 rev = PyInt_AsLong(value);
1020 rev = PyInt_AsLong(value);
950 if (rev > INT_MAX || rev < 0) {
1021 if (rev > INT_MAX || rev < 0) {
951 if (!PyErr_Occurred())
1022 if (!PyErr_Occurred())
952 PyErr_SetString(PyExc_ValueError, "rev out of range");
1023 PyErr_SetString(PyExc_ValueError, "rev out of range");
953 return -1;
1024 return -1;
954 }
1025 }
955 return nt_insert(self, node, (int)rev);
1026 return nt_insert(self, node, (int)rev);
956 }
1027 }
957
1028
958 /*
1029 /*
959 * Find all RevlogNG entries in an index that has inline data. Update
1030 * Find all RevlogNG entries in an index that has inline data. Update
960 * the optional "offsets" table with those entries.
1031 * the optional "offsets" table with those entries.
961 */
1032 */
962 static long inline_scan(indexObject *self, const char **offsets)
1033 static long inline_scan(indexObject *self, const char **offsets)
963 {
1034 {
964 const char *data = PyString_AS_STRING(self->data);
1035 const char *data = PyString_AS_STRING(self->data);
965 const char *end = data + PyString_GET_SIZE(self->data);
1036 const char *end = data + PyString_GET_SIZE(self->data);
966 const long hdrsize = 64;
1037 const long hdrsize = 64;
967 long incr = hdrsize;
1038 long incr = hdrsize;
968 Py_ssize_t len = 0;
1039 Py_ssize_t len = 0;
969
1040
970 while (data + hdrsize <= end) {
1041 while (data + hdrsize <= end) {
971 uint32_t comp_len;
1042 uint32_t comp_len;
972 const char *old_data;
1043 const char *old_data;
973 /* 3rd element of header is length of compressed inline data */
1044 /* 3rd element of header is length of compressed inline data */
974 comp_len = getbe32(data + 8);
1045 comp_len = getbe32(data + 8);
975 incr = hdrsize + comp_len;
1046 incr = hdrsize + comp_len;
976 if (incr < hdrsize)
1047 if (incr < hdrsize)
977 break;
1048 break;
978 if (offsets)
1049 if (offsets)
979 offsets[len] = data;
1050 offsets[len] = data;
980 len++;
1051 len++;
981 old_data = data;
1052 old_data = data;
982 data += incr;
1053 data += incr;
983 if (data <= old_data)
1054 if (data <= old_data)
984 break;
1055 break;
985 }
1056 }
986
1057
987 if (data != end && data + hdrsize != end) {
1058 if (data != end && data + hdrsize != end) {
988 if (!PyErr_Occurred())
1059 if (!PyErr_Occurred())
989 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1060 PyErr_SetString(PyExc_ValueError, "corrupt index file");
990 return -1;
1061 return -1;
991 }
1062 }
992
1063
993 return len;
1064 return len;
994 }
1065 }
995
1066
996 static int index_init(indexObject *self, PyObject *args)
1067 static int index_init(indexObject *self, PyObject *args)
997 {
1068 {
998 PyObject *data_obj, *inlined_obj;
1069 PyObject *data_obj, *inlined_obj;
999 Py_ssize_t size;
1070 Py_ssize_t size;
1000
1071
1001 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1072 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1002 return -1;
1073 return -1;
1003 if (!PyString_Check(data_obj)) {
1074 if (!PyString_Check(data_obj)) {
1004 PyErr_SetString(PyExc_TypeError, "data is not a string");
1075 PyErr_SetString(PyExc_TypeError, "data is not a string");
1005 return -1;
1076 return -1;
1006 }
1077 }
1007 size = PyString_GET_SIZE(data_obj);
1078 size = PyString_GET_SIZE(data_obj);
1008
1079
1009 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1080 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1010 self->data = data_obj;
1081 self->data = data_obj;
1011 self->cache = NULL;
1082 self->cache = NULL;
1012
1083
1013 self->added = NULL;
1084 self->added = NULL;
1014 self->offsets = NULL;
1085 self->offsets = NULL;
1015 self->nt = NULL;
1086 self->nt = NULL;
1016 self->ntlength = self->ntcapacity = 0;
1087 self->ntlength = self->ntcapacity = 0;
1017 self->ntdepth = self->ntsplits = 0;
1088 self->ntdepth = self->ntsplits = 0;
1018 self->ntlookups = self->ntmisses = 0;
1089 self->ntlookups = self->ntmisses = 0;
1019 self->ntrev = -1;
1090 self->ntrev = -1;
1020 Py_INCREF(self->data);
1091 Py_INCREF(self->data);
1021
1092
1022 if (self->inlined) {
1093 if (self->inlined) {
1023 long len = inline_scan(self, NULL);
1094 long len = inline_scan(self, NULL);
1024 if (len == -1)
1095 if (len == -1)
1025 goto bail;
1096 goto bail;
1026 self->raw_length = len;
1097 self->raw_length = len;
1027 self->length = len + 1;
1098 self->length = len + 1;
1028 } else {
1099 } else {
1029 if (size % 64) {
1100 if (size % 64) {
1030 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1101 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1031 goto bail;
1102 goto bail;
1032 }
1103 }
1033 self->raw_length = size / 64;
1104 self->raw_length = size / 64;
1034 self->length = self->raw_length + 1;
1105 self->length = self->raw_length + 1;
1035 }
1106 }
1036
1107
1037 return 0;
1108 return 0;
1038 bail:
1109 bail:
1039 return -1;
1110 return -1;
1040 }
1111 }
1041
1112
1042 static PyObject *index_nodemap(indexObject *self)
1113 static PyObject *index_nodemap(indexObject *self)
1043 {
1114 {
1044 Py_INCREF(self);
1115 Py_INCREF(self);
1045 return (PyObject *)self;
1116 return (PyObject *)self;
1046 }
1117 }
1047
1118
1048 static void index_dealloc(indexObject *self)
1119 static void index_dealloc(indexObject *self)
1049 {
1120 {
1050 _index_clearcaches(self);
1121 _index_clearcaches(self);
1051 Py_DECREF(self->data);
1122 Py_DECREF(self->data);
1052 Py_XDECREF(self->added);
1123 Py_XDECREF(self->added);
1053 PyObject_Del(self);
1124 PyObject_Del(self);
1054 }
1125 }
1055
1126
1056 static PySequenceMethods index_sequence_methods = {
1127 static PySequenceMethods index_sequence_methods = {
1057 (lenfunc)index_length, /* sq_length */
1128 (lenfunc)index_length, /* sq_length */
1058 0, /* sq_concat */
1129 0, /* sq_concat */
1059 0, /* sq_repeat */
1130 0, /* sq_repeat */
1060 (ssizeargfunc)index_get, /* sq_item */
1131 (ssizeargfunc)index_get, /* sq_item */
1061 0, /* sq_slice */
1132 0, /* sq_slice */
1062 0, /* sq_ass_item */
1133 0, /* sq_ass_item */
1063 0, /* sq_ass_slice */
1134 0, /* sq_ass_slice */
1064 (objobjproc)index_contains, /* sq_contains */
1135 (objobjproc)index_contains, /* sq_contains */
1065 };
1136 };
1066
1137
1067 static PyMappingMethods index_mapping_methods = {
1138 static PyMappingMethods index_mapping_methods = {
1068 (lenfunc)index_length, /* mp_length */
1139 (lenfunc)index_length, /* mp_length */
1069 (binaryfunc)index_getitem, /* mp_subscript */
1140 (binaryfunc)index_getitem, /* mp_subscript */
1070 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1141 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1071 };
1142 };
1072
1143
1073 static PyMethodDef index_methods[] = {
1144 static PyMethodDef index_methods[] = {
1074 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1145 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1075 "clear the index caches"},
1146 "clear the index caches"},
1076 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1147 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1077 "get an index entry"},
1148 "get an index entry"},
1078 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1149 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1079 "insert an index entry"},
1150 "insert an index entry"},
1151 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1152 "match a potentially ambiguous node ID"},
1080 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1153 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1081 "stats for the index"},
1154 "stats for the index"},
1082 {NULL} /* Sentinel */
1155 {NULL} /* Sentinel */
1083 };
1156 };
1084
1157
1085 static PyGetSetDef index_getset[] = {
1158 static PyGetSetDef index_getset[] = {
1086 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1159 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1087 {NULL} /* Sentinel */
1160 {NULL} /* Sentinel */
1088 };
1161 };
1089
1162
1090 static PyTypeObject indexType = {
1163 static PyTypeObject indexType = {
1091 PyObject_HEAD_INIT(NULL)
1164 PyObject_HEAD_INIT(NULL)
1092 0, /* ob_size */
1165 0, /* ob_size */
1093 "parsers.index", /* tp_name */
1166 "parsers.index", /* tp_name */
1094 sizeof(indexObject), /* tp_basicsize */
1167 sizeof(indexObject), /* tp_basicsize */
1095 0, /* tp_itemsize */
1168 0, /* tp_itemsize */
1096 (destructor)index_dealloc, /* tp_dealloc */
1169 (destructor)index_dealloc, /* tp_dealloc */
1097 0, /* tp_print */
1170 0, /* tp_print */
1098 0, /* tp_getattr */
1171 0, /* tp_getattr */
1099 0, /* tp_setattr */
1172 0, /* tp_setattr */
1100 0, /* tp_compare */
1173 0, /* tp_compare */
1101 0, /* tp_repr */
1174 0, /* tp_repr */
1102 0, /* tp_as_number */
1175 0, /* tp_as_number */
1103 &index_sequence_methods, /* tp_as_sequence */
1176 &index_sequence_methods, /* tp_as_sequence */
1104 &index_mapping_methods, /* tp_as_mapping */
1177 &index_mapping_methods, /* tp_as_mapping */
1105 0, /* tp_hash */
1178 0, /* tp_hash */
1106 0, /* tp_call */
1179 0, /* tp_call */
1107 0, /* tp_str */
1180 0, /* tp_str */
1108 0, /* tp_getattro */
1181 0, /* tp_getattro */
1109 0, /* tp_setattro */
1182 0, /* tp_setattro */
1110 0, /* tp_as_buffer */
1183 0, /* tp_as_buffer */
1111 Py_TPFLAGS_DEFAULT, /* tp_flags */
1184 Py_TPFLAGS_DEFAULT, /* tp_flags */
1112 "revlog index", /* tp_doc */
1185 "revlog index", /* tp_doc */
1113 0, /* tp_traverse */
1186 0, /* tp_traverse */
1114 0, /* tp_clear */
1187 0, /* tp_clear */
1115 0, /* tp_richcompare */
1188 0, /* tp_richcompare */
1116 0, /* tp_weaklistoffset */
1189 0, /* tp_weaklistoffset */
1117 0, /* tp_iter */
1190 0, /* tp_iter */
1118 0, /* tp_iternext */
1191 0, /* tp_iternext */
1119 index_methods, /* tp_methods */
1192 index_methods, /* tp_methods */
1120 0, /* tp_members */
1193 0, /* tp_members */
1121 index_getset, /* tp_getset */
1194 index_getset, /* tp_getset */
1122 0, /* tp_base */
1195 0, /* tp_base */
1123 0, /* tp_dict */
1196 0, /* tp_dict */
1124 0, /* tp_descr_get */
1197 0, /* tp_descr_get */
1125 0, /* tp_descr_set */
1198 0, /* tp_descr_set */
1126 0, /* tp_dictoffset */
1199 0, /* tp_dictoffset */
1127 (initproc)index_init, /* tp_init */
1200 (initproc)index_init, /* tp_init */
1128 0, /* tp_alloc */
1201 0, /* tp_alloc */
1129 };
1202 };
1130
1203
1131 /*
1204 /*
1132 * returns a tuple of the form (index, index, cache) with elements as
1205 * returns a tuple of the form (index, index, cache) with elements as
1133 * follows:
1206 * follows:
1134 *
1207 *
1135 * index: an index object that lazily parses RevlogNG records
1208 * index: an index object that lazily parses RevlogNG records
1136 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1209 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1137 *
1210 *
1138 * added complications are for backwards compatibility
1211 * added complications are for backwards compatibility
1139 */
1212 */
1140 static PyObject *parse_index2(PyObject *self, PyObject *args)
1213 static PyObject *parse_index2(PyObject *self, PyObject *args)
1141 {
1214 {
1142 PyObject *tuple = NULL, *cache = NULL;
1215 PyObject *tuple = NULL, *cache = NULL;
1143 indexObject *idx;
1216 indexObject *idx;
1144 int ret;
1217 int ret;
1145
1218
1146 idx = PyObject_New(indexObject, &indexType);
1219 idx = PyObject_New(indexObject, &indexType);
1147 if (idx == NULL)
1220 if (idx == NULL)
1148 goto bail;
1221 goto bail;
1149
1222
1150 ret = index_init(idx, args);
1223 ret = index_init(idx, args);
1151 if (ret == -1)
1224 if (ret == -1)
1152 goto bail;
1225 goto bail;
1153
1226
1154 if (idx->inlined) {
1227 if (idx->inlined) {
1155 cache = Py_BuildValue("iO", 0, idx->data);
1228 cache = Py_BuildValue("iO", 0, idx->data);
1156 if (cache == NULL)
1229 if (cache == NULL)
1157 goto bail;
1230 goto bail;
1158 } else {
1231 } else {
1159 cache = Py_None;
1232 cache = Py_None;
1160 Py_INCREF(cache);
1233 Py_INCREF(cache);
1161 }
1234 }
1162
1235
1163 tuple = Py_BuildValue("NN", idx, cache);
1236 tuple = Py_BuildValue("NN", idx, cache);
1164 if (!tuple)
1237 if (!tuple)
1165 goto bail;
1238 goto bail;
1166 return tuple;
1239 return tuple;
1167
1240
1168 bail:
1241 bail:
1169 Py_XDECREF(idx);
1242 Py_XDECREF(idx);
1170 Py_XDECREF(cache);
1243 Py_XDECREF(cache);
1171 Py_XDECREF(tuple);
1244 Py_XDECREF(tuple);
1172 return NULL;
1245 return NULL;
1173 }
1246 }
1174
1247
1175 static char parsers_doc[] = "Efficient content parsing.";
1248 static char parsers_doc[] = "Efficient content parsing.";
1176
1249
1177 static PyMethodDef methods[] = {
1250 static PyMethodDef methods[] = {
1178 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1251 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1179 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1252 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1180 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1253 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1181 {NULL, NULL}
1254 {NULL, NULL}
1182 };
1255 };
1183
1256
1184 static void module_init(PyObject *mod)
1257 static void module_init(PyObject *mod)
1185 {
1258 {
1186 indexType.tp_new = PyType_GenericNew;
1259 indexType.tp_new = PyType_GenericNew;
1187 if (PyType_Ready(&indexType) < 0)
1260 if (PyType_Ready(&indexType) < 0)
1188 return;
1261 return;
1189 Py_INCREF(&indexType);
1262 Py_INCREF(&indexType);
1190
1263
1191 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1264 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1192
1265
1193 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1266 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1194 -1, -1, -1, -1, nullid, 20);
1267 -1, -1, -1, -1, nullid, 20);
1195 if (nullentry)
1268 if (nullentry)
1196 PyObject_GC_UnTrack(nullentry);
1269 PyObject_GC_UnTrack(nullentry);
1197 }
1270 }
1198
1271
1199 #ifdef IS_PY3K
1272 #ifdef IS_PY3K
1200 static struct PyModuleDef parsers_module = {
1273 static struct PyModuleDef parsers_module = {
1201 PyModuleDef_HEAD_INIT,
1274 PyModuleDef_HEAD_INIT,
1202 "parsers",
1275 "parsers",
1203 parsers_doc,
1276 parsers_doc,
1204 -1,
1277 -1,
1205 methods
1278 methods
1206 };
1279 };
1207
1280
1208 PyMODINIT_FUNC PyInit_parsers(void)
1281 PyMODINIT_FUNC PyInit_parsers(void)
1209 {
1282 {
1210 PyObject *mod = PyModule_Create(&parsers_module);
1283 PyObject *mod = PyModule_Create(&parsers_module);
1211 module_init(mod);
1284 module_init(mod);
1212 return mod;
1285 return mod;
1213 }
1286 }
1214 #else
1287 #else
1215 PyMODINIT_FUNC initparsers(void)
1288 PyMODINIT_FUNC initparsers(void)
1216 {
1289 {
1217 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1290 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1218 module_init(mod);
1291 module_init(mod);
1219 }
1292 }
1220 #endif
1293 #endif
@@ -1,1308 +1,1317 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, dagutil
17 import ancestor, mdiff, parsers, error, util, dagutil
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45
45
46 def getoffset(q):
46 def getoffset(q):
47 return int(q >> 16)
47 return int(q >> 16)
48
48
49 def gettype(q):
49 def gettype(q):
50 return int(q & 0xFFFF)
50 return int(q & 0xFFFF)
51
51
52 def offset_type(offset, type):
52 def offset_type(offset, type):
53 return long(long(offset) << 16 | type)
53 return long(long(offset) << 16 | type)
54
54
55 nullhash = _sha(nullid)
55 nullhash = _sha(nullid)
56
56
57 def hash(text, p1, p2):
57 def hash(text, p1, p2):
58 """generate a hash from the given text and its parent hashes
58 """generate a hash from the given text and its parent hashes
59
59
60 This hash combines both the current file contents and its history
60 This hash combines both the current file contents and its history
61 in a manner that makes it easy to distinguish nodes with the same
61 in a manner that makes it easy to distinguish nodes with the same
62 content in the revision graph.
62 content in the revision graph.
63 """
63 """
64 # As of now, if one of the parent node is null, p2 is null
64 # As of now, if one of the parent node is null, p2 is null
65 if p2 == nullid:
65 if p2 == nullid:
66 # deep copy of a hash is faster than creating one
66 # deep copy of a hash is faster than creating one
67 s = nullhash.copy()
67 s = nullhash.copy()
68 s.update(p1)
68 s.update(p1)
69 else:
69 else:
70 # none of the parent nodes are nullid
70 # none of the parent nodes are nullid
71 l = [p1, p2]
71 l = [p1, p2]
72 l.sort()
72 l.sort()
73 s = _sha(l[0])
73 s = _sha(l[0])
74 s.update(l[1])
74 s.update(l[1])
75 s.update(text)
75 s.update(text)
76 return s.digest()
76 return s.digest()
77
77
78 def compress(text):
78 def compress(text):
79 """ generate a possibly-compressed representation of text """
79 """ generate a possibly-compressed representation of text """
80 if not text:
80 if not text:
81 return ("", text)
81 return ("", text)
82 l = len(text)
82 l = len(text)
83 bin = None
83 bin = None
84 if l < 44:
84 if l < 44:
85 pass
85 pass
86 elif l > 1000000:
86 elif l > 1000000:
87 # zlib makes an internal copy, thus doubling memory usage for
87 # zlib makes an internal copy, thus doubling memory usage for
88 # large files, so lets do this in pieces
88 # large files, so lets do this in pieces
89 z = zlib.compressobj()
89 z = zlib.compressobj()
90 p = []
90 p = []
91 pos = 0
91 pos = 0
92 while pos < l:
92 while pos < l:
93 pos2 = pos + 2**20
93 pos2 = pos + 2**20
94 p.append(z.compress(text[pos:pos2]))
94 p.append(z.compress(text[pos:pos2]))
95 pos = pos2
95 pos = pos2
96 p.append(z.flush())
96 p.append(z.flush())
97 if sum(map(len, p)) < l:
97 if sum(map(len, p)) < l:
98 bin = "".join(p)
98 bin = "".join(p)
99 else:
99 else:
100 bin = _compress(text)
100 bin = _compress(text)
101 if bin is None or len(bin) > l:
101 if bin is None or len(bin) > l:
102 if text[0] == '\0':
102 if text[0] == '\0':
103 return ("", text)
103 return ("", text)
104 return ('u', text)
104 return ('u', text)
105 return ("", bin)
105 return ("", bin)
106
106
107 def decompress(bin):
107 def decompress(bin):
108 """ decompress the given input """
108 """ decompress the given input """
109 if not bin:
109 if not bin:
110 return bin
110 return bin
111 t = bin[0]
111 t = bin[0]
112 if t == '\0':
112 if t == '\0':
113 return bin
113 return bin
114 if t == 'x':
114 if t == 'x':
115 return _decompress(bin)
115 return _decompress(bin)
116 if t == 'u':
116 if t == 'u':
117 return bin[1:]
117 return bin[1:]
118 raise RevlogError(_("unknown compression type %r") % t)
118 raise RevlogError(_("unknown compression type %r") % t)
119
119
120 indexformatv0 = ">4l20s20s20s"
120 indexformatv0 = ">4l20s20s20s"
121 v0shaoffset = 56
121 v0shaoffset = 56
122
122
123 class revlogoldio(object):
123 class revlogoldio(object):
124 def __init__(self):
124 def __init__(self):
125 self.size = struct.calcsize(indexformatv0)
125 self.size = struct.calcsize(indexformatv0)
126
126
127 def parseindex(self, data, inline):
127 def parseindex(self, data, inline):
128 s = self.size
128 s = self.size
129 index = []
129 index = []
130 nodemap = {nullid: nullrev}
130 nodemap = {nullid: nullrev}
131 n = off = 0
131 n = off = 0
132 l = len(data)
132 l = len(data)
133 while off + s <= l:
133 while off + s <= l:
134 cur = data[off:off + s]
134 cur = data[off:off + s]
135 off += s
135 off += s
136 e = _unpack(indexformatv0, cur)
136 e = _unpack(indexformatv0, cur)
137 # transform to revlogv1 format
137 # transform to revlogv1 format
138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 index.append(e2)
140 index.append(e2)
141 nodemap[e[6]] = n
141 nodemap[e[6]] = n
142 n += 1
142 n += 1
143
143
144 # add the magic null revision at -1
144 # add the magic null revision at -1
145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146
146
147 return index, nodemap, None
147 return index, nodemap, None
148
148
149 def packentry(self, entry, node, version, rev):
149 def packentry(self, entry, node, version, rev):
150 if gettype(entry[0]):
150 if gettype(entry[0]):
151 raise RevlogError(_("index entry flags need RevlogNG"))
151 raise RevlogError(_("index entry flags need RevlogNG"))
152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 node(entry[5]), node(entry[6]), entry[7])
153 node(entry[5]), node(entry[6]), entry[7])
154 return _pack(indexformatv0, *e2)
154 return _pack(indexformatv0, *e2)
155
155
156 # index ng:
156 # index ng:
157 # 6 bytes: offset
157 # 6 bytes: offset
158 # 2 bytes: flags
158 # 2 bytes: flags
159 # 4 bytes: compressed length
159 # 4 bytes: compressed length
160 # 4 bytes: uncompressed length
160 # 4 bytes: uncompressed length
161 # 4 bytes: base rev
161 # 4 bytes: base rev
162 # 4 bytes: link rev
162 # 4 bytes: link rev
163 # 4 bytes: parent 1 rev
163 # 4 bytes: parent 1 rev
164 # 4 bytes: parent 2 rev
164 # 4 bytes: parent 2 rev
165 # 32 bytes: nodeid
165 # 32 bytes: nodeid
166 indexformatng = ">Qiiiiii20s12x"
166 indexformatng = ">Qiiiiii20s12x"
167 ngshaoffset = 32
167 ngshaoffset = 32
168 versionformat = ">I"
168 versionformat = ">I"
169
169
170 class revlogio(object):
170 class revlogio(object):
171 def __init__(self):
171 def __init__(self):
172 self.size = struct.calcsize(indexformatng)
172 self.size = struct.calcsize(indexformatng)
173
173
174 def parseindex(self, data, inline):
174 def parseindex(self, data, inline):
175 # call the C implementation to parse the index data
175 # call the C implementation to parse the index data
176 index, cache = parsers.parse_index2(data, inline)
176 index, cache = parsers.parse_index2(data, inline)
177 return index, getattr(index, 'nodemap', None), cache
177 return index, getattr(index, 'nodemap', None), cache
178
178
179 def packentry(self, entry, node, version, rev):
179 def packentry(self, entry, node, version, rev):
180 p = _pack(indexformatng, *entry)
180 p = _pack(indexformatng, *entry)
181 if rev == 0:
181 if rev == 0:
182 p = _pack(versionformat, version) + p[4:]
182 p = _pack(versionformat, version) + p[4:]
183 return p
183 return p
184
184
185 class revlog(object):
185 class revlog(object):
186 """
186 """
187 the underlying revision storage object
187 the underlying revision storage object
188
188
189 A revlog consists of two parts, an index and the revision data.
189 A revlog consists of two parts, an index and the revision data.
190
190
191 The index is a file with a fixed record size containing
191 The index is a file with a fixed record size containing
192 information on each revision, including its nodeid (hash), the
192 information on each revision, including its nodeid (hash), the
193 nodeids of its parents, the position and offset of its data within
193 nodeids of its parents, the position and offset of its data within
194 the data file, and the revision it's based on. Finally, each entry
194 the data file, and the revision it's based on. Finally, each entry
195 contains a linkrev entry that can serve as a pointer to external
195 contains a linkrev entry that can serve as a pointer to external
196 data.
196 data.
197
197
198 The revision data itself is a linear collection of data chunks.
198 The revision data itself is a linear collection of data chunks.
199 Each chunk represents a revision and is usually represented as a
199 Each chunk represents a revision and is usually represented as a
200 delta against the previous chunk. To bound lookup time, runs of
200 delta against the previous chunk. To bound lookup time, runs of
201 deltas are limited to about 2 times the length of the original
201 deltas are limited to about 2 times the length of the original
202 version data. This makes retrieval of a version proportional to
202 version data. This makes retrieval of a version proportional to
203 its size, or O(1) relative to the number of revisions.
203 its size, or O(1) relative to the number of revisions.
204
204
205 Both pieces of the revlog are written to in an append-only
205 Both pieces of the revlog are written to in an append-only
206 fashion, which means we never need to rewrite a file to insert or
206 fashion, which means we never need to rewrite a file to insert or
207 remove data, and can use some simple techniques to avoid the need
207 remove data, and can use some simple techniques to avoid the need
208 for locking while reading.
208 for locking while reading.
209 """
209 """
210 def __init__(self, opener, indexfile):
210 def __init__(self, opener, indexfile):
211 """
211 """
212 create a revlog object
212 create a revlog object
213
213
214 opener is a function that abstracts the file opening operation
214 opener is a function that abstracts the file opening operation
215 and can be used to implement COW semantics or the like.
215 and can be used to implement COW semantics or the like.
216 """
216 """
217 self.indexfile = indexfile
217 self.indexfile = indexfile
218 self.datafile = indexfile[:-2] + ".d"
218 self.datafile = indexfile[:-2] + ".d"
219 self.opener = opener
219 self.opener = opener
220 self._cache = None
220 self._cache = None
221 self._basecache = (0, 0)
221 self._basecache = (0, 0)
222 self._chunkcache = (0, '')
222 self._chunkcache = (0, '')
223 self.index = []
223 self.index = []
224 self._pcache = {}
224 self._pcache = {}
225 self._nodecache = {nullid: nullrev}
225 self._nodecache = {nullid: nullrev}
226 self._nodepos = None
226 self._nodepos = None
227
227
228 v = REVLOG_DEFAULT_VERSION
228 v = REVLOG_DEFAULT_VERSION
229 opts = getattr(opener, 'options', None)
229 opts = getattr(opener, 'options', None)
230 if opts is not None:
230 if opts is not None:
231 if 'revlogv1' in opts:
231 if 'revlogv1' in opts:
232 if 'generaldelta' in opts:
232 if 'generaldelta' in opts:
233 v |= REVLOGGENERALDELTA
233 v |= REVLOGGENERALDELTA
234 else:
234 else:
235 v = 0
235 v = 0
236
236
237 i = ''
237 i = ''
238 self._initempty = True
238 self._initempty = True
239 try:
239 try:
240 f = self.opener(self.indexfile)
240 f = self.opener(self.indexfile)
241 i = f.read()
241 i = f.read()
242 f.close()
242 f.close()
243 if len(i) > 0:
243 if len(i) > 0:
244 v = struct.unpack(versionformat, i[:4])[0]
244 v = struct.unpack(versionformat, i[:4])[0]
245 self._initempty = False
245 self._initempty = False
246 except IOError, inst:
246 except IOError, inst:
247 if inst.errno != errno.ENOENT:
247 if inst.errno != errno.ENOENT:
248 raise
248 raise
249
249
250 self.version = v
250 self.version = v
251 self._inline = v & REVLOGNGINLINEDATA
251 self._inline = v & REVLOGNGINLINEDATA
252 self._generaldelta = v & REVLOGGENERALDELTA
252 self._generaldelta = v & REVLOGGENERALDELTA
253 flags = v & ~0xFFFF
253 flags = v & ~0xFFFF
254 fmt = v & 0xFFFF
254 fmt = v & 0xFFFF
255 if fmt == REVLOGV0 and flags:
255 if fmt == REVLOGV0 and flags:
256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
257 % (self.indexfile, flags >> 16))
257 % (self.indexfile, flags >> 16))
258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
260 % (self.indexfile, flags >> 16))
260 % (self.indexfile, flags >> 16))
261 elif fmt > REVLOGNG:
261 elif fmt > REVLOGNG:
262 raise RevlogError(_("index %s unknown format %d")
262 raise RevlogError(_("index %s unknown format %d")
263 % (self.indexfile, fmt))
263 % (self.indexfile, fmt))
264
264
265 self._io = revlogio()
265 self._io = revlogio()
266 if self.version == REVLOGV0:
266 if self.version == REVLOGV0:
267 self._io = revlogoldio()
267 self._io = revlogoldio()
268 try:
268 try:
269 d = self._io.parseindex(i, self._inline)
269 d = self._io.parseindex(i, self._inline)
270 except (ValueError, IndexError):
270 except (ValueError, IndexError):
271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
272 self.index, nodemap, self._chunkcache = d
272 self.index, nodemap, self._chunkcache = d
273 if nodemap is not None:
273 if nodemap is not None:
274 self.nodemap = self._nodecache = nodemap
274 self.nodemap = self._nodecache = nodemap
275 if not self._chunkcache:
275 if not self._chunkcache:
276 self._chunkclear()
276 self._chunkclear()
277
277
278 def tip(self):
278 def tip(self):
279 return self.node(len(self.index) - 2)
279 return self.node(len(self.index) - 2)
280 def __len__(self):
280 def __len__(self):
281 return len(self.index) - 1
281 return len(self.index) - 1
282 def __iter__(self):
282 def __iter__(self):
283 for i in xrange(len(self)):
283 for i in xrange(len(self)):
284 yield i
284 yield i
285
285
286 @util.propertycache
286 @util.propertycache
287 def nodemap(self):
287 def nodemap(self):
288 self.rev(self.node(0))
288 self.rev(self.node(0))
289 return self._nodecache
289 return self._nodecache
290
290
291 def hasnode(self, node):
291 def hasnode(self, node):
292 try:
292 try:
293 self.rev(node)
293 self.rev(node)
294 return True
294 return True
295 except KeyError:
295 except KeyError:
296 return False
296 return False
297
297
298 def clearcaches(self):
298 def clearcaches(self):
299 try:
299 try:
300 self._nodecache.clearcaches()
300 self._nodecache.clearcaches()
301 except AttributeError:
301 except AttributeError:
302 self._nodecache = {nullid: nullrev}
302 self._nodecache = {nullid: nullrev}
303 self._nodepos = None
303 self._nodepos = None
304
304
305 def rev(self, node):
305 def rev(self, node):
306 try:
306 try:
307 return self._nodecache[node]
307 return self._nodecache[node]
308 except RevlogError:
308 except RevlogError:
309 # parsers.c radix tree lookup failed
309 # parsers.c radix tree lookup failed
310 raise LookupError(node, self.indexfile, _('no node'))
310 raise LookupError(node, self.indexfile, _('no node'))
311 except KeyError:
311 except KeyError:
312 # pure python cache lookup failed
312 # pure python cache lookup failed
313 n = self._nodecache
313 n = self._nodecache
314 i = self.index
314 i = self.index
315 p = self._nodepos
315 p = self._nodepos
316 if p is None:
316 if p is None:
317 p = len(i) - 2
317 p = len(i) - 2
318 for r in xrange(p, -1, -1):
318 for r in xrange(p, -1, -1):
319 v = i[r][7]
319 v = i[r][7]
320 n[v] = r
320 n[v] = r
321 if v == node:
321 if v == node:
322 self._nodepos = r - 1
322 self._nodepos = r - 1
323 return r
323 return r
324 raise LookupError(node, self.indexfile, _('no node'))
324 raise LookupError(node, self.indexfile, _('no node'))
325
325
326 def node(self, rev):
326 def node(self, rev):
327 return self.index[rev][7]
327 return self.index[rev][7]
328 def linkrev(self, rev):
328 def linkrev(self, rev):
329 return self.index[rev][4]
329 return self.index[rev][4]
330 def parents(self, node):
330 def parents(self, node):
331 i = self.index
331 i = self.index
332 d = i[self.rev(node)]
332 d = i[self.rev(node)]
333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
334 def parentrevs(self, rev):
334 def parentrevs(self, rev):
335 return self.index[rev][5:7]
335 return self.index[rev][5:7]
336 def start(self, rev):
336 def start(self, rev):
337 return int(self.index[rev][0] >> 16)
337 return int(self.index[rev][0] >> 16)
338 def end(self, rev):
338 def end(self, rev):
339 return self.start(rev) + self.length(rev)
339 return self.start(rev) + self.length(rev)
340 def length(self, rev):
340 def length(self, rev):
341 return self.index[rev][1]
341 return self.index[rev][1]
342 def chainbase(self, rev):
342 def chainbase(self, rev):
343 index = self.index
343 index = self.index
344 base = index[rev][3]
344 base = index[rev][3]
345 while base != rev:
345 while base != rev:
346 rev = base
346 rev = base
347 base = index[rev][3]
347 base = index[rev][3]
348 return base
348 return base
349 def flags(self, rev):
349 def flags(self, rev):
350 return self.index[rev][0] & 0xFFFF
350 return self.index[rev][0] & 0xFFFF
351 def rawsize(self, rev):
351 def rawsize(self, rev):
352 """return the length of the uncompressed text for a given revision"""
352 """return the length of the uncompressed text for a given revision"""
353 l = self.index[rev][2]
353 l = self.index[rev][2]
354 if l >= 0:
354 if l >= 0:
355 return l
355 return l
356
356
357 t = self.revision(self.node(rev))
357 t = self.revision(self.node(rev))
358 return len(t)
358 return len(t)
359 size = rawsize
359 size = rawsize
360
360
361 def reachable(self, node, stop=None):
361 def reachable(self, node, stop=None):
362 """return the set of all nodes ancestral to a given node, including
362 """return the set of all nodes ancestral to a given node, including
363 the node itself, stopping when stop is matched"""
363 the node itself, stopping when stop is matched"""
364 reachable = set((node,))
364 reachable = set((node,))
365 visit = [node]
365 visit = [node]
366 if stop:
366 if stop:
367 stopn = self.rev(stop)
367 stopn = self.rev(stop)
368 else:
368 else:
369 stopn = 0
369 stopn = 0
370 while visit:
370 while visit:
371 n = visit.pop(0)
371 n = visit.pop(0)
372 if n == stop:
372 if n == stop:
373 continue
373 continue
374 if n == nullid:
374 if n == nullid:
375 continue
375 continue
376 for p in self.parents(n):
376 for p in self.parents(n):
377 if self.rev(p) < stopn:
377 if self.rev(p) < stopn:
378 continue
378 continue
379 if p not in reachable:
379 if p not in reachable:
380 reachable.add(p)
380 reachable.add(p)
381 visit.append(p)
381 visit.append(p)
382 return reachable
382 return reachable
383
383
384 def ancestors(self, *revs):
384 def ancestors(self, *revs):
385 """Generate the ancestors of 'revs' in reverse topological order.
385 """Generate the ancestors of 'revs' in reverse topological order.
386
386
387 Yield a sequence of revision numbers starting with the parents
387 Yield a sequence of revision numbers starting with the parents
388 of each revision in revs, i.e., each revision is *not* considered
388 of each revision in revs, i.e., each revision is *not* considered
389 an ancestor of itself. Results are in breadth-first order:
389 an ancestor of itself. Results are in breadth-first order:
390 parents of each rev in revs, then parents of those, etc. Result
390 parents of each rev in revs, then parents of those, etc. Result
391 does not include the null revision."""
391 does not include the null revision."""
392 visit = list(revs)
392 visit = list(revs)
393 seen = set([nullrev])
393 seen = set([nullrev])
394 while visit:
394 while visit:
395 for parent in self.parentrevs(visit.pop(0)):
395 for parent in self.parentrevs(visit.pop(0)):
396 if parent not in seen:
396 if parent not in seen:
397 visit.append(parent)
397 visit.append(parent)
398 seen.add(parent)
398 seen.add(parent)
399 yield parent
399 yield parent
400
400
401 def descendants(self, *revs):
401 def descendants(self, *revs):
402 """Generate the descendants of 'revs' in revision order.
402 """Generate the descendants of 'revs' in revision order.
403
403
404 Yield a sequence of revision numbers starting with a child of
404 Yield a sequence of revision numbers starting with a child of
405 some rev in revs, i.e., each revision is *not* considered a
405 some rev in revs, i.e., each revision is *not* considered a
406 descendant of itself. Results are ordered by revision number (a
406 descendant of itself. Results are ordered by revision number (a
407 topological sort)."""
407 topological sort)."""
408 first = min(revs)
408 first = min(revs)
409 if first == nullrev:
409 if first == nullrev:
410 for i in self:
410 for i in self:
411 yield i
411 yield i
412 return
412 return
413
413
414 seen = set(revs)
414 seen = set(revs)
415 for i in xrange(first + 1, len(self)):
415 for i in xrange(first + 1, len(self)):
416 for x in self.parentrevs(i):
416 for x in self.parentrevs(i):
417 if x != nullrev and x in seen:
417 if x != nullrev and x in seen:
418 seen.add(i)
418 seen.add(i)
419 yield i
419 yield i
420 break
420 break
421
421
422 def findcommonmissing(self, common=None, heads=None):
422 def findcommonmissing(self, common=None, heads=None):
423 """Return a tuple of the ancestors of common and the ancestors of heads
423 """Return a tuple of the ancestors of common and the ancestors of heads
424 that are not ancestors of common. In revset terminology, we return the
424 that are not ancestors of common. In revset terminology, we return the
425 tuple:
425 tuple:
426
426
427 ::common, (::heads) - (::common)
427 ::common, (::heads) - (::common)
428
428
429 The list is sorted by revision number, meaning it is
429 The list is sorted by revision number, meaning it is
430 topologically sorted.
430 topologically sorted.
431
431
432 'heads' and 'common' are both lists of node IDs. If heads is
432 'heads' and 'common' are both lists of node IDs. If heads is
433 not supplied, uses all of the revlog's heads. If common is not
433 not supplied, uses all of the revlog's heads. If common is not
434 supplied, uses nullid."""
434 supplied, uses nullid."""
435 if common is None:
435 if common is None:
436 common = [nullid]
436 common = [nullid]
437 if heads is None:
437 if heads is None:
438 heads = self.heads()
438 heads = self.heads()
439
439
440 common = [self.rev(n) for n in common]
440 common = [self.rev(n) for n in common]
441 heads = [self.rev(n) for n in heads]
441 heads = [self.rev(n) for n in heads]
442
442
443 # we want the ancestors, but inclusive
443 # we want the ancestors, but inclusive
444 has = set(self.ancestors(*common))
444 has = set(self.ancestors(*common))
445 has.add(nullrev)
445 has.add(nullrev)
446 has.update(common)
446 has.update(common)
447
447
448 # take all ancestors from heads that aren't in has
448 # take all ancestors from heads that aren't in has
449 missing = set()
449 missing = set()
450 visit = [r for r in heads if r not in has]
450 visit = [r for r in heads if r not in has]
451 while visit:
451 while visit:
452 r = visit.pop(0)
452 r = visit.pop(0)
453 if r in missing:
453 if r in missing:
454 continue
454 continue
455 else:
455 else:
456 missing.add(r)
456 missing.add(r)
457 for p in self.parentrevs(r):
457 for p in self.parentrevs(r):
458 if p not in has:
458 if p not in has:
459 visit.append(p)
459 visit.append(p)
460 missing = list(missing)
460 missing = list(missing)
461 missing.sort()
461 missing.sort()
462 return has, [self.node(r) for r in missing]
462 return has, [self.node(r) for r in missing]
463
463
464 def findmissing(self, common=None, heads=None):
464 def findmissing(self, common=None, heads=None):
465 """Return the ancestors of heads that are not ancestors of common.
465 """Return the ancestors of heads that are not ancestors of common.
466
466
467 More specifically, return a list of nodes N such that every N
467 More specifically, return a list of nodes N such that every N
468 satisfies the following constraints:
468 satisfies the following constraints:
469
469
470 1. N is an ancestor of some node in 'heads'
470 1. N is an ancestor of some node in 'heads'
471 2. N is not an ancestor of any node in 'common'
471 2. N is not an ancestor of any node in 'common'
472
472
473 The list is sorted by revision number, meaning it is
473 The list is sorted by revision number, meaning it is
474 topologically sorted.
474 topologically sorted.
475
475
476 'heads' and 'common' are both lists of node IDs. If heads is
476 'heads' and 'common' are both lists of node IDs. If heads is
477 not supplied, uses all of the revlog's heads. If common is not
477 not supplied, uses all of the revlog's heads. If common is not
478 supplied, uses nullid."""
478 supplied, uses nullid."""
479 _common, missing = self.findcommonmissing(common, heads)
479 _common, missing = self.findcommonmissing(common, heads)
480 return missing
480 return missing
481
481
482 def nodesbetween(self, roots=None, heads=None):
482 def nodesbetween(self, roots=None, heads=None):
483 """Return a topological path from 'roots' to 'heads'.
483 """Return a topological path from 'roots' to 'heads'.
484
484
485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
486 topologically sorted list of all nodes N that satisfy both of
486 topologically sorted list of all nodes N that satisfy both of
487 these constraints:
487 these constraints:
488
488
489 1. N is a descendant of some node in 'roots'
489 1. N is a descendant of some node in 'roots'
490 2. N is an ancestor of some node in 'heads'
490 2. N is an ancestor of some node in 'heads'
491
491
492 Every node is considered to be both a descendant and an ancestor
492 Every node is considered to be both a descendant and an ancestor
493 of itself, so every reachable node in 'roots' and 'heads' will be
493 of itself, so every reachable node in 'roots' and 'heads' will be
494 included in 'nodes'.
494 included in 'nodes'.
495
495
496 'outroots' is the list of reachable nodes in 'roots', i.e., the
496 'outroots' is the list of reachable nodes in 'roots', i.e., the
497 subset of 'roots' that is returned in 'nodes'. Likewise,
497 subset of 'roots' that is returned in 'nodes'. Likewise,
498 'outheads' is the subset of 'heads' that is also in 'nodes'.
498 'outheads' is the subset of 'heads' that is also in 'nodes'.
499
499
500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
501 unspecified, uses nullid as the only root. If 'heads' is
501 unspecified, uses nullid as the only root. If 'heads' is
502 unspecified, uses list of all of the revlog's heads."""
502 unspecified, uses list of all of the revlog's heads."""
503 nonodes = ([], [], [])
503 nonodes = ([], [], [])
504 if roots is not None:
504 if roots is not None:
505 roots = list(roots)
505 roots = list(roots)
506 if not roots:
506 if not roots:
507 return nonodes
507 return nonodes
508 lowestrev = min([self.rev(n) for n in roots])
508 lowestrev = min([self.rev(n) for n in roots])
509 else:
509 else:
510 roots = [nullid] # Everybody's a descendant of nullid
510 roots = [nullid] # Everybody's a descendant of nullid
511 lowestrev = nullrev
511 lowestrev = nullrev
512 if (lowestrev == nullrev) and (heads is None):
512 if (lowestrev == nullrev) and (heads is None):
513 # We want _all_ the nodes!
513 # We want _all_ the nodes!
514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
515 if heads is None:
515 if heads is None:
516 # All nodes are ancestors, so the latest ancestor is the last
516 # All nodes are ancestors, so the latest ancestor is the last
517 # node.
517 # node.
518 highestrev = len(self) - 1
518 highestrev = len(self) - 1
519 # Set ancestors to None to signal that every node is an ancestor.
519 # Set ancestors to None to signal that every node is an ancestor.
520 ancestors = None
520 ancestors = None
521 # Set heads to an empty dictionary for later discovery of heads
521 # Set heads to an empty dictionary for later discovery of heads
522 heads = {}
522 heads = {}
523 else:
523 else:
524 heads = list(heads)
524 heads = list(heads)
525 if not heads:
525 if not heads:
526 return nonodes
526 return nonodes
527 ancestors = set()
527 ancestors = set()
528 # Turn heads into a dictionary so we can remove 'fake' heads.
528 # Turn heads into a dictionary so we can remove 'fake' heads.
529 # Also, later we will be using it to filter out the heads we can't
529 # Also, later we will be using it to filter out the heads we can't
530 # find from roots.
530 # find from roots.
531 heads = dict.fromkeys(heads, False)
531 heads = dict.fromkeys(heads, False)
532 # Start at the top and keep marking parents until we're done.
532 # Start at the top and keep marking parents until we're done.
533 nodestotag = set(heads)
533 nodestotag = set(heads)
534 # Remember where the top was so we can use it as a limit later.
534 # Remember where the top was so we can use it as a limit later.
535 highestrev = max([self.rev(n) for n in nodestotag])
535 highestrev = max([self.rev(n) for n in nodestotag])
536 while nodestotag:
536 while nodestotag:
537 # grab a node to tag
537 # grab a node to tag
538 n = nodestotag.pop()
538 n = nodestotag.pop()
539 # Never tag nullid
539 # Never tag nullid
540 if n == nullid:
540 if n == nullid:
541 continue
541 continue
542 # A node's revision number represents its place in a
542 # A node's revision number represents its place in a
543 # topologically sorted list of nodes.
543 # topologically sorted list of nodes.
544 r = self.rev(n)
544 r = self.rev(n)
545 if r >= lowestrev:
545 if r >= lowestrev:
546 if n not in ancestors:
546 if n not in ancestors:
547 # If we are possibly a descendant of one of the roots
547 # If we are possibly a descendant of one of the roots
548 # and we haven't already been marked as an ancestor
548 # and we haven't already been marked as an ancestor
549 ancestors.add(n) # Mark as ancestor
549 ancestors.add(n) # Mark as ancestor
550 # Add non-nullid parents to list of nodes to tag.
550 # Add non-nullid parents to list of nodes to tag.
551 nodestotag.update([p for p in self.parents(n) if
551 nodestotag.update([p for p in self.parents(n) if
552 p != nullid])
552 p != nullid])
553 elif n in heads: # We've seen it before, is it a fake head?
553 elif n in heads: # We've seen it before, is it a fake head?
554 # So it is, real heads should not be the ancestors of
554 # So it is, real heads should not be the ancestors of
555 # any other heads.
555 # any other heads.
556 heads.pop(n)
556 heads.pop(n)
557 if not ancestors:
557 if not ancestors:
558 return nonodes
558 return nonodes
559 # Now that we have our set of ancestors, we want to remove any
559 # Now that we have our set of ancestors, we want to remove any
560 # roots that are not ancestors.
560 # roots that are not ancestors.
561
561
562 # If one of the roots was nullid, everything is included anyway.
562 # If one of the roots was nullid, everything is included anyway.
563 if lowestrev > nullrev:
563 if lowestrev > nullrev:
564 # But, since we weren't, let's recompute the lowest rev to not
564 # But, since we weren't, let's recompute the lowest rev to not
565 # include roots that aren't ancestors.
565 # include roots that aren't ancestors.
566
566
567 # Filter out roots that aren't ancestors of heads
567 # Filter out roots that aren't ancestors of heads
568 roots = [n for n in roots if n in ancestors]
568 roots = [n for n in roots if n in ancestors]
569 # Recompute the lowest revision
569 # Recompute the lowest revision
570 if roots:
570 if roots:
571 lowestrev = min([self.rev(n) for n in roots])
571 lowestrev = min([self.rev(n) for n in roots])
572 else:
572 else:
573 # No more roots? Return empty list
573 # No more roots? Return empty list
574 return nonodes
574 return nonodes
575 else:
575 else:
576 # We are descending from nullid, and don't need to care about
576 # We are descending from nullid, and don't need to care about
577 # any other roots.
577 # any other roots.
578 lowestrev = nullrev
578 lowestrev = nullrev
579 roots = [nullid]
579 roots = [nullid]
580 # Transform our roots list into a set.
580 # Transform our roots list into a set.
581 descendants = set(roots)
581 descendants = set(roots)
582 # Also, keep the original roots so we can filter out roots that aren't
582 # Also, keep the original roots so we can filter out roots that aren't
583 # 'real' roots (i.e. are descended from other roots).
583 # 'real' roots (i.e. are descended from other roots).
584 roots = descendants.copy()
584 roots = descendants.copy()
585 # Our topologically sorted list of output nodes.
585 # Our topologically sorted list of output nodes.
586 orderedout = []
586 orderedout = []
587 # Don't start at nullid since we don't want nullid in our output list,
587 # Don't start at nullid since we don't want nullid in our output list,
588 # and if nullid shows up in descedents, empty parents will look like
588 # and if nullid shows up in descedents, empty parents will look like
589 # they're descendants.
589 # they're descendants.
590 for r in xrange(max(lowestrev, 0), highestrev + 1):
590 for r in xrange(max(lowestrev, 0), highestrev + 1):
591 n = self.node(r)
591 n = self.node(r)
592 isdescendant = False
592 isdescendant = False
593 if lowestrev == nullrev: # Everybody is a descendant of nullid
593 if lowestrev == nullrev: # Everybody is a descendant of nullid
594 isdescendant = True
594 isdescendant = True
595 elif n in descendants:
595 elif n in descendants:
596 # n is already a descendant
596 # n is already a descendant
597 isdescendant = True
597 isdescendant = True
598 # This check only needs to be done here because all the roots
598 # This check only needs to be done here because all the roots
599 # will start being marked is descendants before the loop.
599 # will start being marked is descendants before the loop.
600 if n in roots:
600 if n in roots:
601 # If n was a root, check if it's a 'real' root.
601 # If n was a root, check if it's a 'real' root.
602 p = tuple(self.parents(n))
602 p = tuple(self.parents(n))
603 # If any of its parents are descendants, it's not a root.
603 # If any of its parents are descendants, it's not a root.
604 if (p[0] in descendants) or (p[1] in descendants):
604 if (p[0] in descendants) or (p[1] in descendants):
605 roots.remove(n)
605 roots.remove(n)
606 else:
606 else:
607 p = tuple(self.parents(n))
607 p = tuple(self.parents(n))
608 # A node is a descendant if either of its parents are
608 # A node is a descendant if either of its parents are
609 # descendants. (We seeded the dependents list with the roots
609 # descendants. (We seeded the dependents list with the roots
610 # up there, remember?)
610 # up there, remember?)
611 if (p[0] in descendants) or (p[1] in descendants):
611 if (p[0] in descendants) or (p[1] in descendants):
612 descendants.add(n)
612 descendants.add(n)
613 isdescendant = True
613 isdescendant = True
614 if isdescendant and ((ancestors is None) or (n in ancestors)):
614 if isdescendant and ((ancestors is None) or (n in ancestors)):
615 # Only include nodes that are both descendants and ancestors.
615 # Only include nodes that are both descendants and ancestors.
616 orderedout.append(n)
616 orderedout.append(n)
617 if (ancestors is not None) and (n in heads):
617 if (ancestors is not None) and (n in heads):
618 # We're trying to figure out which heads are reachable
618 # We're trying to figure out which heads are reachable
619 # from roots.
619 # from roots.
620 # Mark this head as having been reached
620 # Mark this head as having been reached
621 heads[n] = True
621 heads[n] = True
622 elif ancestors is None:
622 elif ancestors is None:
623 # Otherwise, we're trying to discover the heads.
623 # Otherwise, we're trying to discover the heads.
624 # Assume this is a head because if it isn't, the next step
624 # Assume this is a head because if it isn't, the next step
625 # will eventually remove it.
625 # will eventually remove it.
626 heads[n] = True
626 heads[n] = True
627 # But, obviously its parents aren't.
627 # But, obviously its parents aren't.
628 for p in self.parents(n):
628 for p in self.parents(n):
629 heads.pop(p, None)
629 heads.pop(p, None)
630 heads = [n for n, flag in heads.iteritems() if flag]
630 heads = [n for n, flag in heads.iteritems() if flag]
631 roots = list(roots)
631 roots = list(roots)
632 assert orderedout
632 assert orderedout
633 assert roots
633 assert roots
634 assert heads
634 assert heads
635 return (orderedout, roots, heads)
635 return (orderedout, roots, heads)
636
636
637 def headrevs(self):
637 def headrevs(self):
638 count = len(self)
638 count = len(self)
639 if not count:
639 if not count:
640 return [nullrev]
640 return [nullrev]
641 ishead = [1] * (count + 1)
641 ishead = [1] * (count + 1)
642 index = self.index
642 index = self.index
643 for r in xrange(count):
643 for r in xrange(count):
644 e = index[r]
644 e = index[r]
645 ishead[e[5]] = ishead[e[6]] = 0
645 ishead[e[5]] = ishead[e[6]] = 0
646 return [r for r in xrange(count) if ishead[r]]
646 return [r for r in xrange(count) if ishead[r]]
647
647
648 def heads(self, start=None, stop=None):
648 def heads(self, start=None, stop=None):
649 """return the list of all nodes that have no children
649 """return the list of all nodes that have no children
650
650
651 if start is specified, only heads that are descendants of
651 if start is specified, only heads that are descendants of
652 start will be returned
652 start will be returned
653 if stop is specified, it will consider all the revs from stop
653 if stop is specified, it will consider all the revs from stop
654 as if they had no children
654 as if they had no children
655 """
655 """
656 if start is None and stop is None:
656 if start is None and stop is None:
657 if not len(self):
657 if not len(self):
658 return [nullid]
658 return [nullid]
659 return [self.node(r) for r in self.headrevs()]
659 return [self.node(r) for r in self.headrevs()]
660
660
661 if start is None:
661 if start is None:
662 start = nullid
662 start = nullid
663 if stop is None:
663 if stop is None:
664 stop = []
664 stop = []
665 stoprevs = set([self.rev(n) for n in stop])
665 stoprevs = set([self.rev(n) for n in stop])
666 startrev = self.rev(start)
666 startrev = self.rev(start)
667 reachable = set((startrev,))
667 reachable = set((startrev,))
668 heads = set((startrev,))
668 heads = set((startrev,))
669
669
670 parentrevs = self.parentrevs
670 parentrevs = self.parentrevs
671 for r in xrange(startrev + 1, len(self)):
671 for r in xrange(startrev + 1, len(self)):
672 for p in parentrevs(r):
672 for p in parentrevs(r):
673 if p in reachable:
673 if p in reachable:
674 if r not in stoprevs:
674 if r not in stoprevs:
675 reachable.add(r)
675 reachable.add(r)
676 heads.add(r)
676 heads.add(r)
677 if p in heads and p not in stoprevs:
677 if p in heads and p not in stoprevs:
678 heads.remove(p)
678 heads.remove(p)
679
679
680 return [self.node(r) for r in heads]
680 return [self.node(r) for r in heads]
681
681
682 def children(self, node):
682 def children(self, node):
683 """find the children of a given node"""
683 """find the children of a given node"""
684 c = []
684 c = []
685 p = self.rev(node)
685 p = self.rev(node)
686 for r in range(p + 1, len(self)):
686 for r in range(p + 1, len(self)):
687 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
687 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
688 if prevs:
688 if prevs:
689 for pr in prevs:
689 for pr in prevs:
690 if pr == p:
690 if pr == p:
691 c.append(self.node(r))
691 c.append(self.node(r))
692 elif p == nullrev:
692 elif p == nullrev:
693 c.append(self.node(r))
693 c.append(self.node(r))
694 return c
694 return c
695
695
696 def descendant(self, start, end):
696 def descendant(self, start, end):
697 if start == nullrev:
697 if start == nullrev:
698 return True
698 return True
699 for i in self.descendants(start):
699 for i in self.descendants(start):
700 if i == end:
700 if i == end:
701 return True
701 return True
702 elif i > end:
702 elif i > end:
703 break
703 break
704 return False
704 return False
705
705
706 def ancestor(self, a, b):
706 def ancestor(self, a, b):
707 """calculate the least common ancestor of nodes a and b"""
707 """calculate the least common ancestor of nodes a and b"""
708
708
709 # fast path, check if it is a descendant
709 # fast path, check if it is a descendant
710 a, b = self.rev(a), self.rev(b)
710 a, b = self.rev(a), self.rev(b)
711 start, end = sorted((a, b))
711 start, end = sorted((a, b))
712 if self.descendant(start, end):
712 if self.descendant(start, end):
713 return self.node(start)
713 return self.node(start)
714
714
715 def parents(rev):
715 def parents(rev):
716 return [p for p in self.parentrevs(rev) if p != nullrev]
716 return [p for p in self.parentrevs(rev) if p != nullrev]
717
717
718 c = ancestor.ancestor(a, b, parents)
718 c = ancestor.ancestor(a, b, parents)
719 if c is None:
719 if c is None:
720 return nullid
720 return nullid
721
721
722 return self.node(c)
722 return self.node(c)
723
723
724 def _match(self, id):
724 def _match(self, id):
725 if isinstance(id, (long, int)):
725 if isinstance(id, (long, int)):
726 # rev
726 # rev
727 return self.node(id)
727 return self.node(id)
728 if len(id) == 20:
728 if len(id) == 20:
729 # possibly a binary node
729 # possibly a binary node
730 # odds of a binary node being all hex in ASCII are 1 in 10**25
730 # odds of a binary node being all hex in ASCII are 1 in 10**25
731 try:
731 try:
732 node = id
732 node = id
733 self.rev(node) # quick search the index
733 self.rev(node) # quick search the index
734 return node
734 return node
735 except LookupError:
735 except LookupError:
736 pass # may be partial hex id
736 pass # may be partial hex id
737 try:
737 try:
738 # str(rev)
738 # str(rev)
739 rev = int(id)
739 rev = int(id)
740 if str(rev) != id:
740 if str(rev) != id:
741 raise ValueError
741 raise ValueError
742 if rev < 0:
742 if rev < 0:
743 rev = len(self) + rev
743 rev = len(self) + rev
744 if rev < 0 or rev >= len(self):
744 if rev < 0 or rev >= len(self):
745 raise ValueError
745 raise ValueError
746 return self.node(rev)
746 return self.node(rev)
747 except (ValueError, OverflowError):
747 except (ValueError, OverflowError):
748 pass
748 pass
749 if len(id) == 40:
749 if len(id) == 40:
750 try:
750 try:
751 # a full hex nodeid?
751 # a full hex nodeid?
752 node = bin(id)
752 node = bin(id)
753 self.rev(node)
753 self.rev(node)
754 return node
754 return node
755 except (TypeError, LookupError):
755 except (TypeError, LookupError):
756 pass
756 pass
757
757
758 def _partialmatch(self, id):
758 def _partialmatch(self, id):
759 try:
760 return self.index.partialmatch(id)
761 except RevlogError:
762 # parsers.c radix tree lookup gave multiple matches
763 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
764 except (AttributeError, ValueError):
765 # we are pure python, or key was too short to search radix tree
766 pass
767
759 if id in self._pcache:
768 if id in self._pcache:
760 return self._pcache[id]
769 return self._pcache[id]
761
770
762 if len(id) < 40:
771 if len(id) < 40:
763 try:
772 try:
764 # hex(node)[:...]
773 # hex(node)[:...]
765 l = len(id) // 2 # grab an even number of digits
774 l = len(id) // 2 # grab an even number of digits
766 prefix = bin(id[:l * 2])
775 prefix = bin(id[:l * 2])
767 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
776 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
768 nl = [n for n in nl if hex(n).startswith(id)]
777 nl = [n for n in nl if hex(n).startswith(id)]
769 if len(nl) > 0:
778 if len(nl) > 0:
770 if len(nl) == 1:
779 if len(nl) == 1:
771 self._pcache[id] = nl[0]
780 self._pcache[id] = nl[0]
772 return nl[0]
781 return nl[0]
773 raise LookupError(id, self.indexfile,
782 raise LookupError(id, self.indexfile,
774 _('ambiguous identifier'))
783 _('ambiguous identifier'))
775 return None
784 return None
776 except TypeError:
785 except TypeError:
777 pass
786 pass
778
787
779 def lookup(self, id):
788 def lookup(self, id):
780 """locate a node based on:
789 """locate a node based on:
781 - revision number or str(revision number)
790 - revision number or str(revision number)
782 - nodeid or subset of hex nodeid
791 - nodeid or subset of hex nodeid
783 """
792 """
784 n = self._match(id)
793 n = self._match(id)
785 if n is not None:
794 if n is not None:
786 return n
795 return n
787 n = self._partialmatch(id)
796 n = self._partialmatch(id)
788 if n:
797 if n:
789 return n
798 return n
790
799
791 raise LookupError(id, self.indexfile, _('no match found'))
800 raise LookupError(id, self.indexfile, _('no match found'))
792
801
793 def cmp(self, node, text):
802 def cmp(self, node, text):
794 """compare text with a given file revision
803 """compare text with a given file revision
795
804
796 returns True if text is different than what is stored.
805 returns True if text is different than what is stored.
797 """
806 """
798 p1, p2 = self.parents(node)
807 p1, p2 = self.parents(node)
799 return hash(text, p1, p2) != node
808 return hash(text, p1, p2) != node
800
809
801 def _addchunk(self, offset, data):
810 def _addchunk(self, offset, data):
802 o, d = self._chunkcache
811 o, d = self._chunkcache
803 # try to add to existing cache
812 # try to add to existing cache
804 if o + len(d) == offset and len(d) + len(data) < _chunksize:
813 if o + len(d) == offset and len(d) + len(data) < _chunksize:
805 self._chunkcache = o, d + data
814 self._chunkcache = o, d + data
806 else:
815 else:
807 self._chunkcache = offset, data
816 self._chunkcache = offset, data
808
817
809 def _loadchunk(self, offset, length):
818 def _loadchunk(self, offset, length):
810 if self._inline:
819 if self._inline:
811 df = self.opener(self.indexfile)
820 df = self.opener(self.indexfile)
812 else:
821 else:
813 df = self.opener(self.datafile)
822 df = self.opener(self.datafile)
814
823
815 readahead = max(65536, length)
824 readahead = max(65536, length)
816 df.seek(offset)
825 df.seek(offset)
817 d = df.read(readahead)
826 d = df.read(readahead)
818 df.close()
827 df.close()
819 self._addchunk(offset, d)
828 self._addchunk(offset, d)
820 if readahead > length:
829 if readahead > length:
821 return util.buffer(d, 0, length)
830 return util.buffer(d, 0, length)
822 return d
831 return d
823
832
824 def _getchunk(self, offset, length):
833 def _getchunk(self, offset, length):
825 o, d = self._chunkcache
834 o, d = self._chunkcache
826 l = len(d)
835 l = len(d)
827
836
828 # is it in the cache?
837 # is it in the cache?
829 cachestart = offset - o
838 cachestart = offset - o
830 cacheend = cachestart + length
839 cacheend = cachestart + length
831 if cachestart >= 0 and cacheend <= l:
840 if cachestart >= 0 and cacheend <= l:
832 if cachestart == 0 and cacheend == l:
841 if cachestart == 0 and cacheend == l:
833 return d # avoid a copy
842 return d # avoid a copy
834 return util.buffer(d, cachestart, cacheend - cachestart)
843 return util.buffer(d, cachestart, cacheend - cachestart)
835
844
836 return self._loadchunk(offset, length)
845 return self._loadchunk(offset, length)
837
846
838 def _chunkraw(self, startrev, endrev):
847 def _chunkraw(self, startrev, endrev):
839 start = self.start(startrev)
848 start = self.start(startrev)
840 length = self.end(endrev) - start
849 length = self.end(endrev) - start
841 if self._inline:
850 if self._inline:
842 start += (startrev + 1) * self._io.size
851 start += (startrev + 1) * self._io.size
843 return self._getchunk(start, length)
852 return self._getchunk(start, length)
844
853
845 def _chunk(self, rev):
854 def _chunk(self, rev):
846 return decompress(self._chunkraw(rev, rev))
855 return decompress(self._chunkraw(rev, rev))
847
856
848 def _chunkbase(self, rev):
857 def _chunkbase(self, rev):
849 return self._chunk(rev)
858 return self._chunk(rev)
850
859
851 def _chunkclear(self):
860 def _chunkclear(self):
852 self._chunkcache = (0, '')
861 self._chunkcache = (0, '')
853
862
854 def deltaparent(self, rev):
863 def deltaparent(self, rev):
855 """return deltaparent of the given revision"""
864 """return deltaparent of the given revision"""
856 base = self.index[rev][3]
865 base = self.index[rev][3]
857 if base == rev:
866 if base == rev:
858 return nullrev
867 return nullrev
859 elif self._generaldelta:
868 elif self._generaldelta:
860 return base
869 return base
861 else:
870 else:
862 return rev - 1
871 return rev - 1
863
872
864 def revdiff(self, rev1, rev2):
873 def revdiff(self, rev1, rev2):
865 """return or calculate a delta between two revisions"""
874 """return or calculate a delta between two revisions"""
866 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
875 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
867 return str(self._chunk(rev2))
876 return str(self._chunk(rev2))
868
877
869 return mdiff.textdiff(self.revision(rev1),
878 return mdiff.textdiff(self.revision(rev1),
870 self.revision(rev2))
879 self.revision(rev2))
871
880
872 def revision(self, nodeorrev):
881 def revision(self, nodeorrev):
873 """return an uncompressed revision of a given node or revision
882 """return an uncompressed revision of a given node or revision
874 number.
883 number.
875 """
884 """
876 if isinstance(nodeorrev, int):
885 if isinstance(nodeorrev, int):
877 rev = nodeorrev
886 rev = nodeorrev
878 node = self.node(rev)
887 node = self.node(rev)
879 else:
888 else:
880 node = nodeorrev
889 node = nodeorrev
881 rev = None
890 rev = None
882
891
883 cachedrev = None
892 cachedrev = None
884 if node == nullid:
893 if node == nullid:
885 return ""
894 return ""
886 if self._cache:
895 if self._cache:
887 if self._cache[0] == node:
896 if self._cache[0] == node:
888 return self._cache[2]
897 return self._cache[2]
889 cachedrev = self._cache[1]
898 cachedrev = self._cache[1]
890
899
891 # look up what we need to read
900 # look up what we need to read
892 text = None
901 text = None
893 if rev is None:
902 if rev is None:
894 rev = self.rev(node)
903 rev = self.rev(node)
895
904
896 # check rev flags
905 # check rev flags
897 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
906 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
898 raise RevlogError(_('incompatible revision flag %x') %
907 raise RevlogError(_('incompatible revision flag %x') %
899 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
908 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
900
909
901 # build delta chain
910 # build delta chain
902 chain = []
911 chain = []
903 index = self.index # for performance
912 index = self.index # for performance
904 generaldelta = self._generaldelta
913 generaldelta = self._generaldelta
905 iterrev = rev
914 iterrev = rev
906 e = index[iterrev]
915 e = index[iterrev]
907 while iterrev != e[3] and iterrev != cachedrev:
916 while iterrev != e[3] and iterrev != cachedrev:
908 chain.append(iterrev)
917 chain.append(iterrev)
909 if generaldelta:
918 if generaldelta:
910 iterrev = e[3]
919 iterrev = e[3]
911 else:
920 else:
912 iterrev -= 1
921 iterrev -= 1
913 e = index[iterrev]
922 e = index[iterrev]
914 chain.reverse()
923 chain.reverse()
915 base = iterrev
924 base = iterrev
916
925
917 if iterrev == cachedrev:
926 if iterrev == cachedrev:
918 # cache hit
927 # cache hit
919 text = self._cache[2]
928 text = self._cache[2]
920
929
921 # drop cache to save memory
930 # drop cache to save memory
922 self._cache = None
931 self._cache = None
923
932
924 self._chunkraw(base, rev)
933 self._chunkraw(base, rev)
925 if text is None:
934 if text is None:
926 text = str(self._chunkbase(base))
935 text = str(self._chunkbase(base))
927
936
928 bins = [self._chunk(r) for r in chain]
937 bins = [self._chunk(r) for r in chain]
929 text = mdiff.patches(text, bins)
938 text = mdiff.patches(text, bins)
930
939
931 text = self._checkhash(text, node, rev)
940 text = self._checkhash(text, node, rev)
932
941
933 self._cache = (node, rev, text)
942 self._cache = (node, rev, text)
934 return text
943 return text
935
944
936 def _checkhash(self, text, node, rev):
945 def _checkhash(self, text, node, rev):
937 p1, p2 = self.parents(node)
946 p1, p2 = self.parents(node)
938 if node != hash(text, p1, p2):
947 if node != hash(text, p1, p2):
939 raise RevlogError(_("integrity check failed on %s:%d")
948 raise RevlogError(_("integrity check failed on %s:%d")
940 % (self.indexfile, rev))
949 % (self.indexfile, rev))
941 return text
950 return text
942
951
943 def checkinlinesize(self, tr, fp=None):
952 def checkinlinesize(self, tr, fp=None):
944 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
953 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
945 return
954 return
946
955
947 trinfo = tr.find(self.indexfile)
956 trinfo = tr.find(self.indexfile)
948 if trinfo is None:
957 if trinfo is None:
949 raise RevlogError(_("%s not found in the transaction")
958 raise RevlogError(_("%s not found in the transaction")
950 % self.indexfile)
959 % self.indexfile)
951
960
952 trindex = trinfo[2]
961 trindex = trinfo[2]
953 dataoff = self.start(trindex)
962 dataoff = self.start(trindex)
954
963
955 tr.add(self.datafile, dataoff)
964 tr.add(self.datafile, dataoff)
956
965
957 if fp:
966 if fp:
958 fp.flush()
967 fp.flush()
959 fp.close()
968 fp.close()
960
969
961 df = self.opener(self.datafile, 'w')
970 df = self.opener(self.datafile, 'w')
962 try:
971 try:
963 for r in self:
972 for r in self:
964 df.write(self._chunkraw(r, r))
973 df.write(self._chunkraw(r, r))
965 finally:
974 finally:
966 df.close()
975 df.close()
967
976
968 fp = self.opener(self.indexfile, 'w', atomictemp=True)
977 fp = self.opener(self.indexfile, 'w', atomictemp=True)
969 self.version &= ~(REVLOGNGINLINEDATA)
978 self.version &= ~(REVLOGNGINLINEDATA)
970 self._inline = False
979 self._inline = False
971 for i in self:
980 for i in self:
972 e = self._io.packentry(self.index[i], self.node, self.version, i)
981 e = self._io.packentry(self.index[i], self.node, self.version, i)
973 fp.write(e)
982 fp.write(e)
974
983
975 # if we don't call close, the temp file will never replace the
984 # if we don't call close, the temp file will never replace the
976 # real index
985 # real index
977 fp.close()
986 fp.close()
978
987
979 tr.replace(self.indexfile, trindex * self._io.size)
988 tr.replace(self.indexfile, trindex * self._io.size)
980 self._chunkclear()
989 self._chunkclear()
981
990
982 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
991 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
983 """add a revision to the log
992 """add a revision to the log
984
993
985 text - the revision data to add
994 text - the revision data to add
986 transaction - the transaction object used for rollback
995 transaction - the transaction object used for rollback
987 link - the linkrev data to add
996 link - the linkrev data to add
988 p1, p2 - the parent nodeids of the revision
997 p1, p2 - the parent nodeids of the revision
989 cachedelta - an optional precomputed delta
998 cachedelta - an optional precomputed delta
990 """
999 """
991 node = hash(text, p1, p2)
1000 node = hash(text, p1, p2)
992 if node in self.nodemap:
1001 if node in self.nodemap:
993 return node
1002 return node
994
1003
995 dfh = None
1004 dfh = None
996 if not self._inline:
1005 if not self._inline:
997 dfh = self.opener(self.datafile, "a")
1006 dfh = self.opener(self.datafile, "a")
998 ifh = self.opener(self.indexfile, "a+")
1007 ifh = self.opener(self.indexfile, "a+")
999 try:
1008 try:
1000 return self._addrevision(node, text, transaction, link, p1, p2,
1009 return self._addrevision(node, text, transaction, link, p1, p2,
1001 cachedelta, ifh, dfh)
1010 cachedelta, ifh, dfh)
1002 finally:
1011 finally:
1003 if dfh:
1012 if dfh:
1004 dfh.close()
1013 dfh.close()
1005 ifh.close()
1014 ifh.close()
1006
1015
1007 def _addrevision(self, node, text, transaction, link, p1, p2,
1016 def _addrevision(self, node, text, transaction, link, p1, p2,
1008 cachedelta, ifh, dfh):
1017 cachedelta, ifh, dfh):
1009 """internal function to add revisions to the log
1018 """internal function to add revisions to the log
1010
1019
1011 see addrevision for argument descriptions.
1020 see addrevision for argument descriptions.
1012 invariants:
1021 invariants:
1013 - text is optional (can be None); if not set, cachedelta must be set.
1022 - text is optional (can be None); if not set, cachedelta must be set.
1014 if both are set, they must correspond to eachother.
1023 if both are set, they must correspond to eachother.
1015 """
1024 """
1016 btext = [text]
1025 btext = [text]
1017 def buildtext():
1026 def buildtext():
1018 if btext[0] is not None:
1027 if btext[0] is not None:
1019 return btext[0]
1028 return btext[0]
1020 # flush any pending writes here so we can read it in revision
1029 # flush any pending writes here so we can read it in revision
1021 if dfh:
1030 if dfh:
1022 dfh.flush()
1031 dfh.flush()
1023 ifh.flush()
1032 ifh.flush()
1024 basetext = self.revision(self.node(cachedelta[0]))
1033 basetext = self.revision(self.node(cachedelta[0]))
1025 btext[0] = mdiff.patch(basetext, cachedelta[1])
1034 btext[0] = mdiff.patch(basetext, cachedelta[1])
1026 chk = hash(btext[0], p1, p2)
1035 chk = hash(btext[0], p1, p2)
1027 if chk != node:
1036 if chk != node:
1028 raise RevlogError(_("consistency error in delta"))
1037 raise RevlogError(_("consistency error in delta"))
1029 return btext[0]
1038 return btext[0]
1030
1039
1031 def builddelta(rev):
1040 def builddelta(rev):
1032 # can we use the cached delta?
1041 # can we use the cached delta?
1033 if cachedelta and cachedelta[0] == rev:
1042 if cachedelta and cachedelta[0] == rev:
1034 delta = cachedelta[1]
1043 delta = cachedelta[1]
1035 else:
1044 else:
1036 t = buildtext()
1045 t = buildtext()
1037 ptext = self.revision(self.node(rev))
1046 ptext = self.revision(self.node(rev))
1038 delta = mdiff.textdiff(ptext, t)
1047 delta = mdiff.textdiff(ptext, t)
1039 data = compress(delta)
1048 data = compress(delta)
1040 l = len(data[1]) + len(data[0])
1049 l = len(data[1]) + len(data[0])
1041 if basecache[0] == rev:
1050 if basecache[0] == rev:
1042 chainbase = basecache[1]
1051 chainbase = basecache[1]
1043 else:
1052 else:
1044 chainbase = self.chainbase(rev)
1053 chainbase = self.chainbase(rev)
1045 dist = l + offset - self.start(chainbase)
1054 dist = l + offset - self.start(chainbase)
1046 if self._generaldelta:
1055 if self._generaldelta:
1047 base = rev
1056 base = rev
1048 else:
1057 else:
1049 base = chainbase
1058 base = chainbase
1050 return dist, l, data, base, chainbase
1059 return dist, l, data, base, chainbase
1051
1060
1052 curr = len(self)
1061 curr = len(self)
1053 prev = curr - 1
1062 prev = curr - 1
1054 base = chainbase = curr
1063 base = chainbase = curr
1055 offset = self.end(prev)
1064 offset = self.end(prev)
1056 flags = 0
1065 flags = 0
1057 d = None
1066 d = None
1058 basecache = self._basecache
1067 basecache = self._basecache
1059 p1r, p2r = self.rev(p1), self.rev(p2)
1068 p1r, p2r = self.rev(p1), self.rev(p2)
1060
1069
1061 # should we try to build a delta?
1070 # should we try to build a delta?
1062 if prev != nullrev:
1071 if prev != nullrev:
1063 if self._generaldelta:
1072 if self._generaldelta:
1064 if p1r >= basecache[1]:
1073 if p1r >= basecache[1]:
1065 d = builddelta(p1r)
1074 d = builddelta(p1r)
1066 elif p2r >= basecache[1]:
1075 elif p2r >= basecache[1]:
1067 d = builddelta(p2r)
1076 d = builddelta(p2r)
1068 else:
1077 else:
1069 d = builddelta(prev)
1078 d = builddelta(prev)
1070 else:
1079 else:
1071 d = builddelta(prev)
1080 d = builddelta(prev)
1072 dist, l, data, base, chainbase = d
1081 dist, l, data, base, chainbase = d
1073
1082
1074 # full versions are inserted when the needed deltas
1083 # full versions are inserted when the needed deltas
1075 # become comparable to the uncompressed text
1084 # become comparable to the uncompressed text
1076 if text is None:
1085 if text is None:
1077 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1086 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1078 cachedelta[1])
1087 cachedelta[1])
1079 else:
1088 else:
1080 textlen = len(text)
1089 textlen = len(text)
1081 if d is None or dist > textlen * 2:
1090 if d is None or dist > textlen * 2:
1082 text = buildtext()
1091 text = buildtext()
1083 data = compress(text)
1092 data = compress(text)
1084 l = len(data[1]) + len(data[0])
1093 l = len(data[1]) + len(data[0])
1085 base = chainbase = curr
1094 base = chainbase = curr
1086
1095
1087 e = (offset_type(offset, flags), l, textlen,
1096 e = (offset_type(offset, flags), l, textlen,
1088 base, link, p1r, p2r, node)
1097 base, link, p1r, p2r, node)
1089 self.index.insert(-1, e)
1098 self.index.insert(-1, e)
1090 self.nodemap[node] = curr
1099 self.nodemap[node] = curr
1091
1100
1092 entry = self._io.packentry(e, self.node, self.version, curr)
1101 entry = self._io.packentry(e, self.node, self.version, curr)
1093 if not self._inline:
1102 if not self._inline:
1094 transaction.add(self.datafile, offset)
1103 transaction.add(self.datafile, offset)
1095 transaction.add(self.indexfile, curr * len(entry))
1104 transaction.add(self.indexfile, curr * len(entry))
1096 if data[0]:
1105 if data[0]:
1097 dfh.write(data[0])
1106 dfh.write(data[0])
1098 dfh.write(data[1])
1107 dfh.write(data[1])
1099 dfh.flush()
1108 dfh.flush()
1100 ifh.write(entry)
1109 ifh.write(entry)
1101 else:
1110 else:
1102 offset += curr * self._io.size
1111 offset += curr * self._io.size
1103 transaction.add(self.indexfile, offset, curr)
1112 transaction.add(self.indexfile, offset, curr)
1104 ifh.write(entry)
1113 ifh.write(entry)
1105 ifh.write(data[0])
1114 ifh.write(data[0])
1106 ifh.write(data[1])
1115 ifh.write(data[1])
1107 self.checkinlinesize(transaction, ifh)
1116 self.checkinlinesize(transaction, ifh)
1108
1117
1109 if type(text) == str: # only accept immutable objects
1118 if type(text) == str: # only accept immutable objects
1110 self._cache = (node, curr, text)
1119 self._cache = (node, curr, text)
1111 self._basecache = (curr, chainbase)
1120 self._basecache = (curr, chainbase)
1112 return node
1121 return node
1113
1122
1114 def group(self, nodelist, bundler, reorder=None):
1123 def group(self, nodelist, bundler, reorder=None):
1115 """Calculate a delta group, yielding a sequence of changegroup chunks
1124 """Calculate a delta group, yielding a sequence of changegroup chunks
1116 (strings).
1125 (strings).
1117
1126
1118 Given a list of changeset revs, return a set of deltas and
1127 Given a list of changeset revs, return a set of deltas and
1119 metadata corresponding to nodes. The first delta is
1128 metadata corresponding to nodes. The first delta is
1120 first parent(nodelist[0]) -> nodelist[0], the receiver is
1129 first parent(nodelist[0]) -> nodelist[0], the receiver is
1121 guaranteed to have this parent as it has all history before
1130 guaranteed to have this parent as it has all history before
1122 these changesets. In the case firstparent is nullrev the
1131 these changesets. In the case firstparent is nullrev the
1123 changegroup starts with a full revision.
1132 changegroup starts with a full revision.
1124 """
1133 """
1125
1134
1126 # if we don't have any revisions touched by these changesets, bail
1135 # if we don't have any revisions touched by these changesets, bail
1127 if len(nodelist) == 0:
1136 if len(nodelist) == 0:
1128 yield bundler.close()
1137 yield bundler.close()
1129 return
1138 return
1130
1139
1131 # for generaldelta revlogs, we linearize the revs; this will both be
1140 # for generaldelta revlogs, we linearize the revs; this will both be
1132 # much quicker and generate a much smaller bundle
1141 # much quicker and generate a much smaller bundle
1133 if (self._generaldelta and reorder is not False) or reorder:
1142 if (self._generaldelta and reorder is not False) or reorder:
1134 dag = dagutil.revlogdag(self)
1143 dag = dagutil.revlogdag(self)
1135 revs = set(self.rev(n) for n in nodelist)
1144 revs = set(self.rev(n) for n in nodelist)
1136 revs = dag.linearize(revs)
1145 revs = dag.linearize(revs)
1137 else:
1146 else:
1138 revs = sorted([self.rev(n) for n in nodelist])
1147 revs = sorted([self.rev(n) for n in nodelist])
1139
1148
1140 # add the parent of the first rev
1149 # add the parent of the first rev
1141 p = self.parentrevs(revs[0])[0]
1150 p = self.parentrevs(revs[0])[0]
1142 revs.insert(0, p)
1151 revs.insert(0, p)
1143
1152
1144 # build deltas
1153 # build deltas
1145 for r in xrange(len(revs) - 1):
1154 for r in xrange(len(revs) - 1):
1146 prev, curr = revs[r], revs[r + 1]
1155 prev, curr = revs[r], revs[r + 1]
1147 for c in bundler.revchunk(self, curr, prev):
1156 for c in bundler.revchunk(self, curr, prev):
1148 yield c
1157 yield c
1149
1158
1150 yield bundler.close()
1159 yield bundler.close()
1151
1160
1152 def addgroup(self, bundle, linkmapper, transaction):
1161 def addgroup(self, bundle, linkmapper, transaction):
1153 """
1162 """
1154 add a delta group
1163 add a delta group
1155
1164
1156 given a set of deltas, add them to the revision log. the
1165 given a set of deltas, add them to the revision log. the
1157 first delta is against its parent, which should be in our
1166 first delta is against its parent, which should be in our
1158 log, the rest are against the previous delta.
1167 log, the rest are against the previous delta.
1159 """
1168 """
1160
1169
1161 # track the base of the current delta log
1170 # track the base of the current delta log
1162 content = []
1171 content = []
1163 node = None
1172 node = None
1164
1173
1165 r = len(self)
1174 r = len(self)
1166 end = 0
1175 end = 0
1167 if r:
1176 if r:
1168 end = self.end(r - 1)
1177 end = self.end(r - 1)
1169 ifh = self.opener(self.indexfile, "a+")
1178 ifh = self.opener(self.indexfile, "a+")
1170 isize = r * self._io.size
1179 isize = r * self._io.size
1171 if self._inline:
1180 if self._inline:
1172 transaction.add(self.indexfile, end + isize, r)
1181 transaction.add(self.indexfile, end + isize, r)
1173 dfh = None
1182 dfh = None
1174 else:
1183 else:
1175 transaction.add(self.indexfile, isize, r)
1184 transaction.add(self.indexfile, isize, r)
1176 transaction.add(self.datafile, end)
1185 transaction.add(self.datafile, end)
1177 dfh = self.opener(self.datafile, "a")
1186 dfh = self.opener(self.datafile, "a")
1178
1187
1179 try:
1188 try:
1180 # loop through our set of deltas
1189 # loop through our set of deltas
1181 chain = None
1190 chain = None
1182 while True:
1191 while True:
1183 chunkdata = bundle.deltachunk(chain)
1192 chunkdata = bundle.deltachunk(chain)
1184 if not chunkdata:
1193 if not chunkdata:
1185 break
1194 break
1186 node = chunkdata['node']
1195 node = chunkdata['node']
1187 p1 = chunkdata['p1']
1196 p1 = chunkdata['p1']
1188 p2 = chunkdata['p2']
1197 p2 = chunkdata['p2']
1189 cs = chunkdata['cs']
1198 cs = chunkdata['cs']
1190 deltabase = chunkdata['deltabase']
1199 deltabase = chunkdata['deltabase']
1191 delta = chunkdata['delta']
1200 delta = chunkdata['delta']
1192
1201
1193 content.append(node)
1202 content.append(node)
1194
1203
1195 link = linkmapper(cs)
1204 link = linkmapper(cs)
1196 if node in self.nodemap:
1205 if node in self.nodemap:
1197 # this can happen if two branches make the same change
1206 # this can happen if two branches make the same change
1198 chain = node
1207 chain = node
1199 continue
1208 continue
1200
1209
1201 for p in (p1, p2):
1210 for p in (p1, p2):
1202 if not p in self.nodemap:
1211 if not p in self.nodemap:
1203 raise LookupError(p, self.indexfile,
1212 raise LookupError(p, self.indexfile,
1204 _('unknown parent'))
1213 _('unknown parent'))
1205
1214
1206 if deltabase not in self.nodemap:
1215 if deltabase not in self.nodemap:
1207 raise LookupError(deltabase, self.indexfile,
1216 raise LookupError(deltabase, self.indexfile,
1208 _('unknown delta base'))
1217 _('unknown delta base'))
1209
1218
1210 baserev = self.rev(deltabase)
1219 baserev = self.rev(deltabase)
1211 chain = self._addrevision(node, None, transaction, link,
1220 chain = self._addrevision(node, None, transaction, link,
1212 p1, p2, (baserev, delta), ifh, dfh)
1221 p1, p2, (baserev, delta), ifh, dfh)
1213 if not dfh and not self._inline:
1222 if not dfh and not self._inline:
1214 # addrevision switched from inline to conventional
1223 # addrevision switched from inline to conventional
1215 # reopen the index
1224 # reopen the index
1216 ifh.close()
1225 ifh.close()
1217 dfh = self.opener(self.datafile, "a")
1226 dfh = self.opener(self.datafile, "a")
1218 ifh = self.opener(self.indexfile, "a")
1227 ifh = self.opener(self.indexfile, "a")
1219 finally:
1228 finally:
1220 if dfh:
1229 if dfh:
1221 dfh.close()
1230 dfh.close()
1222 ifh.close()
1231 ifh.close()
1223
1232
1224 return content
1233 return content
1225
1234
1226 def strip(self, minlink, transaction):
1235 def strip(self, minlink, transaction):
1227 """truncate the revlog on the first revision with a linkrev >= minlink
1236 """truncate the revlog on the first revision with a linkrev >= minlink
1228
1237
1229 This function is called when we're stripping revision minlink and
1238 This function is called when we're stripping revision minlink and
1230 its descendants from the repository.
1239 its descendants from the repository.
1231
1240
1232 We have to remove all revisions with linkrev >= minlink, because
1241 We have to remove all revisions with linkrev >= minlink, because
1233 the equivalent changelog revisions will be renumbered after the
1242 the equivalent changelog revisions will be renumbered after the
1234 strip.
1243 strip.
1235
1244
1236 So we truncate the revlog on the first of these revisions, and
1245 So we truncate the revlog on the first of these revisions, and
1237 trust that the caller has saved the revisions that shouldn't be
1246 trust that the caller has saved the revisions that shouldn't be
1238 removed and that it'll re-add them after this truncation.
1247 removed and that it'll re-add them after this truncation.
1239 """
1248 """
1240 if len(self) == 0:
1249 if len(self) == 0:
1241 return
1250 return
1242
1251
1243 for rev in self:
1252 for rev in self:
1244 if self.index[rev][4] >= minlink:
1253 if self.index[rev][4] >= minlink:
1245 break
1254 break
1246 else:
1255 else:
1247 return
1256 return
1248
1257
1249 # first truncate the files on disk
1258 # first truncate the files on disk
1250 end = self.start(rev)
1259 end = self.start(rev)
1251 if not self._inline:
1260 if not self._inline:
1252 transaction.add(self.datafile, end)
1261 transaction.add(self.datafile, end)
1253 end = rev * self._io.size
1262 end = rev * self._io.size
1254 else:
1263 else:
1255 end += rev * self._io.size
1264 end += rev * self._io.size
1256
1265
1257 transaction.add(self.indexfile, end)
1266 transaction.add(self.indexfile, end)
1258
1267
1259 # then reset internal state in memory to forget those revisions
1268 # then reset internal state in memory to forget those revisions
1260 self._cache = None
1269 self._cache = None
1261 self._chunkclear()
1270 self._chunkclear()
1262 for x in xrange(rev, len(self)):
1271 for x in xrange(rev, len(self)):
1263 del self.nodemap[self.node(x)]
1272 del self.nodemap[self.node(x)]
1264
1273
1265 del self.index[rev:-1]
1274 del self.index[rev:-1]
1266
1275
1267 def checksize(self):
1276 def checksize(self):
1268 expected = 0
1277 expected = 0
1269 if len(self):
1278 if len(self):
1270 expected = max(0, self.end(len(self) - 1))
1279 expected = max(0, self.end(len(self) - 1))
1271
1280
1272 try:
1281 try:
1273 f = self.opener(self.datafile)
1282 f = self.opener(self.datafile)
1274 f.seek(0, 2)
1283 f.seek(0, 2)
1275 actual = f.tell()
1284 actual = f.tell()
1276 f.close()
1285 f.close()
1277 dd = actual - expected
1286 dd = actual - expected
1278 except IOError, inst:
1287 except IOError, inst:
1279 if inst.errno != errno.ENOENT:
1288 if inst.errno != errno.ENOENT:
1280 raise
1289 raise
1281 dd = 0
1290 dd = 0
1282
1291
1283 try:
1292 try:
1284 f = self.opener(self.indexfile)
1293 f = self.opener(self.indexfile)
1285 f.seek(0, 2)
1294 f.seek(0, 2)
1286 actual = f.tell()
1295 actual = f.tell()
1287 f.close()
1296 f.close()
1288 s = self._io.size
1297 s = self._io.size
1289 i = max(0, actual // s)
1298 i = max(0, actual // s)
1290 di = actual - (i * s)
1299 di = actual - (i * s)
1291 if self._inline:
1300 if self._inline:
1292 databytes = 0
1301 databytes = 0
1293 for r in self:
1302 for r in self:
1294 databytes += max(0, self.length(r))
1303 databytes += max(0, self.length(r))
1295 dd = 0
1304 dd = 0
1296 di = actual - len(self) * s - databytes
1305 di = actual - len(self) * s - databytes
1297 except IOError, inst:
1306 except IOError, inst:
1298 if inst.errno != errno.ENOENT:
1307 if inst.errno != errno.ENOENT:
1299 raise
1308 raise
1300 di = 0
1309 di = 0
1301
1310
1302 return (dd, di)
1311 return (dd, di)
1303
1312
1304 def files(self):
1313 def files(self):
1305 res = [self.indexfile]
1314 res = [self.indexfile]
1306 if not self._inline:
1315 if not self._inline:
1307 res.append(self.datafile)
1316 res.append(self.datafile)
1308 return res
1317 return res
General Comments 0
You need to be logged in to leave comments. Login now